source: trunk/source/lisp-kernel/arm-asmutils.s @ 15093

Last change on this file since 15093 was 15093, checked in by gb, 10 years ago

New Linux ARM binaries.

The image and FASL versions changed on the ARM, but (if I did it right)
not on other platforms.

(The image and FASL versions are now architecture-specific. This may
make it somewhat easier and less disruptive to change them, since the
motivation for such a change is often also architecture-specific.)
The FASL and current image version are defined (in the "TARGET" package)
in the architecture-specific *-arch.lisp files; the min, max, and current
image versions are defined in the *constants*.h file for the architecture.

Most of the changes are ARM-specific.

Each TCR now contains a 256-word table at byte offset 256. (We've
been using about 168 bytes in the TCR, so there are still 88 bytes/22
words left for expansion.) The table is initialized at TCR-creation
time to contain the absolute addresses of the subprims (there are
currently around 130 defined); we try otherwise not to reference
subprims by absolute address. Jumping to a subprim is:

(ldr pc (:@ rcontext (:$ offset-of-subprim-in-tcr-table)))

and calling one involves loading its address from that table into a
register and doing (blx reg). We canonically use LR as the register,
since it's going to be clobbered by the blx anyway and there doesn't
seem to be a performance hazard there. The old scheme (which involved
using BA and BLA pseudoinstructions to jump to/call a hidden jump table
at the end of the function) is no longer supported.

ARM Subprims no longer need to be aligned (on anything more than an
instruction boundary.) Some remnants of the consequences of an old
scheme (where subprims had to "fit" in small regions and sometimes
had to jump out of line if they would overflow that region's bounds)
still remain, but we can repair that (and it'll be a bit more straightforward
to add new ARM subprims.) We no longer care (much) about where subprims
are mapped in memory, and don't have to bias suprimitive addresses by
a platform-specific constant (and have to figure out whether or not we've
already done so) on (e.g.) Android.

Rather than setting the first element (fn.entrypoint) of a
newly-created function to the (absolute) address of a subprim that updates
that entrypoint on the first call, we use a little LAP function to correct
the address before the function can be called.

Non-function objects that can be stored in symbols' function cells
(the UNDEFINED-FUNCTION object, the things that encapsulate
special-operator names and global macro-functions) need to be
structured like FUNCTIONS: the need to have a word-aligned entrypoint
in element 0 that tracks the CODE-VECTOR object in element 1. We
don't want these things to be of type FUNCTION, but do want the GC to
adjust the entrypoint if the codevector moves. We've been essentially
out of GVECTOR subtags on 32-bit platforms, largely because of the
constraints that vector/array subtags must be greater than other
subtags and numeric types be less. The first constraint is probably
reasonable, but the second isn't: other typecodes (tag-list, etc) may
be less than the maximum numeric typecode, so tests like NUMBERP can't
reliably involve a simple comparison. (As long as a mask of all
numeric typecodes will fit in a machine word/FIXNUM, a simple LOGBITP
test can be used instead.) Removed all portable and ARM-specific code
that made assumptions about numeric typecode ordering, made a few more
gvector typecodes available, and used one of them to define a new
"pseudofunction" type. Made the GC update the entrypoints of
pseudofunctions and used them for the undefined-function object and
for the function cells of macros/special-operators.

Since we don't need the subprim jump table at the end of each function
anymore, we can more easily revive the idea of embedded pc-relative
constant data ("constant pools") and initialize FPRs from constant
data, avoiding most remaining traffic between FPRs and GPRs.

I've had a fairly-reproducible cache-coherency problem: on the first
GC in the cold load, the thread misbehaves mysteriously when it
resumes. The GC tries to synchronize the I and D caches on the entire
range of addresses that may contain newly-moved code-vectors. I'm not
at all sure why, but walking that range and flushing the cache for
each code-vector individually seems to avoid the problem (and may actually
be faster.)

Fix ticket:894

Fixed a few typos in error messages/comments/etc.

I -think- that the non-ARM-specific changes (how FASL/image versions are
defined) should bootstrap cleanly, but won't know for sure until this is
committed. (I imagine that the buildbot will complain if not.)

File size: 4.8 KB
Line 
1/*   Copyright (C) 2009 Clozure Associates */
2/*   Copyright (C) 1994-2001 Digitool, Inc */
3/*   This file is part of Clozure CL. */
4
5/*   Clozure CL is licensed under the terms of the Lisp Lesser GNU Public */
6/*   License , known as the LLGPL and distributed with Clozure CL as the */
7/*   file "LICENSE".  The LLGPL consists of a preamble and the LGPL, */
8/*   which is distributed with Clozure CL as the file "LGPL".  Where these */
9/*   conflict, the preamble takes precedence. */
10
11/*   Clozure CL is referenced in the preamble as the "LIBRARY." */
12
13/*   The LLGPL is also available online at */
14/*   http://opensource.franz.com/preamble.html */
15
16
17        .syntax unified
18        .arm   
19
20        include(lisp.s)
21
22        _beginfile
23
24/* Force data from r0, size r1 into the icache */       
25_exportfn(C(flush_cache_lines))
26        __ifdef(`LINUX')
27        __(add r1,r1,r0)
28        __(mov r2,#0)           /* options.  Pass as 0 until we know better */
29        __(mov r12,r7)          /* preserve r7 ;  r12 saved by syscall */
30        __(mov r7,#0x0f0000)     /* __ARM_NR_cacheflush */
31        __(add r7,r7,#2)
32        __(svc #0)
33        __(mov r7,r12)
34        __endif
35        __ifdef(`DARWIN')
36        __(mov r3,#0)
37        __(mov r12,#0x80000000)
38        __(svc #0)
39        __endif   
40        __(isb sy)             
41        __(bx lr)
42
43_exportfn(C(touch_page))
44        __(str r0,[r0,#0])
45        __(mov r1,#0)
46        __(str r1,[r0,#0])
47        __(mov r0,#1)
48        .globl C(touch_page_end)
49C(touch_page_end):     
50        __(bx lr)
51_endfn       
52                               
53_exportfn(C(current_stack_pointer))
54        __(mov r0,sp)
55        __(bx lr)
56_endfn
57       
58_exportfn(C(count_leading_zeros))
59        __(clz r0,r0)
60        __(bx lr)
61_endfn
62
63_exportfn(C(noop))
64        __(bx lr)
65_endfn
66
67
68
69
70
71/* Atomically store new value (r2) in *r0, if old value == expected (r1). */
72/* Return actual old value. */
73
74_exportfn(C(store_conditional))
750:      __(ldrex r3,[r0])
76        __(cmp r3,r1)
77        __(bne 1f)
78        __(strex ip,r2,[r0])
79        __(cmp ip,#0)
80        __(bne 0b)
81        __(b 2f)
821:      __(clrex)
832:      __(mov r0,r3)
84        __(bx lr)               
85_endfn
86
87/* Atomically store new_value(r1) in *r0 ;  return previous contents */
88/* of *r0. */
89
90_exportfn(C(atomic_swap))
91        __(mov r2,r0)
920:      __(ldrex r0,[r2])
93        __(strex r3,r1,[r2])
94        __(cmp r3,#0)
95        __(bne 0b)       
96        __(bx lr)
97_endfn
98
99/* Logior the value in *r0 with the value in r1 (presumably a bitmask with exactly 1 */
100/* bit set.)  Return non-zero if any of the bits in that bitmask were already set. */
101       
102_exportfn(C(atomic_ior))
103        __(stmdb sp!,{r4,lr})
1040:      __(ldrex r2,[r0])
105        __(orr r3,r2,r1)
106        __(strex r4,r3,[r0])
107        __(cmp r4,#0)
108        __(bne 0b)
109        __(mov r0,r2)
110        __(ldmia sp!,{r4,pc})
111_endfn
112
113
114/* Logand the value in *r0 with the value in r1 (presumably a bitmask with exactly 1 */
115/* bit set.)  Return the value now in *r0 (for some value of "now" */
116
117_exportfn(C(atomic_and))
1180:      __(ldrex r2,[r0])
119        __(and r2,r2,r1)
120        __(strex r3,r2,[r0])
121        __(cmp r3,#0)
122        __(bne 0b)
123        __(mov r0,r2)
124        __(bx lr)
125_endfn
126               
127       
128        __ifdef(`DARWIN')
129_exportfn(C(enable_fp_exceptions))
130        __(.long 0)
131        __(bx lr)
132_endfn
133       
134_exportfn(C(disable_fp_exceptions))
135        __(.long 0)
136        __(bx lr)
137_endfn
138
139_exportfn(C(pseudo_sigreturn))
140        __(uuo_pseudo_sigreturn())
141        __(b C(pseudo_sigreturn))
142_endfn
143        __endif
144       
145_exportfn(C(save_fp_context))
146        __(uuo_debug_trap(al))
147_endfn         
148_exportfn(C(restore_fp_context))
149        __(uuo_debug_trap(al))
150_endfn         
151_exportfn(C(put_vector_registers))
152        __(uuo_debug_trap(al))
153_endfn         
154_exportfn(C(get_vector_registers))
155        __(uuo_debug_trap(al))
156_endfn
157       
158        __ifdef(`ANDROID')
159_exportfn(rt_sigprocmask)
160        __(stmdb sp!,{r7,lr})
161        __(mov r7,#175)
162        __(svc #0)
163        __(ldmia sp!,{r7,pc})
164_endfn
165        __endif
166       
167
168        __ifdef(`DARWIN')
169/* divide the 64-bit unsigned integer in r0/r1 by the 64-bit unsigned
170   integer in r2/r3; return the 64-bit quotient in r0/r1 and the 64-bit
171   remainder in r2/r3.  Implement this in terms of the libgcc function: 
172
173   unsigned long long __udivti3 (unsigned long long a, 
174                                 unsigned long long b, 
175                                 unsigned long long *c)
176*/       
177_exportfn(C(__aeabi_uldivmod))
178        __(stmdb sp!,{r7,lr})
179        __(mov r7,sp)
180        __(sub sp,sp,#8)
181        __(mov ip,sp)
182        __(push1(ip,sp))
183        __(push1(ip,sp))
184        __(bl C(__udivmoddi4))
185        __(add sp,sp,#8)
186        __(ldmia sp!,{r2,r3})
187        __(ldmia sp!,{r7,pc})
188_endfn               
189        __endif
190
191_exportfn(call_handler_on_main_stack)
192        __(ldr ip,[sp])
193        __(mov lr,r3)
194        __(mov sp,r1)
195        __(bx ip)
196_endfn               
197
198                               
199        _endfile
200
Note: See TracBrowser for help on using the repository browser.