source: trunk/source/lisp-kernel/arm-asmutils.s @ 15229

Last change on this file since 15229 was 15229, checked in by gb, 7 years ago

When the GC tries to zero out recycled pages, it's traditionally
done so by calling memset(p,0,n). Profiling output indicates that
a lot of time is spent in this call to memset. When the GC uses
it, the address p is dnode-aligned and n is a multiple of the dnode
size, so try to exploit this by using a new function zero_dnodes().
The initial implementations may be faster than memset and there's
lots of room for easy improvement. (The PPC and ARM versions aren't
tested yet.)

File size: 5.2 KB
Line 
1/*   Copyright (C) 2009 Clozure Associates */
2/*   Copyright (C) 1994-2001 Digitool, Inc */
3/*   This file is part of Clozure CL. */
4
5/*   Clozure CL is licensed under the terms of the Lisp Lesser GNU Public */
6/*   License , known as the LLGPL and distributed with Clozure CL as the */
7/*   file "LICENSE".  The LLGPL consists of a preamble and the LGPL, */
8/*   which is distributed with Clozure CL as the file "LGPL".  Where these */
9/*   conflict, the preamble takes precedence. */
10
11/*   Clozure CL is referenced in the preamble as the "LIBRARY." */
12
13/*   The LLGPL is also available online at */
14/*   http://opensource.franz.com/preamble.html */
15
16
17        .syntax unified
18        .arm   
19
20        include(lisp.s)
21
22        _beginfile
23
24/* Force data from r0, size r1 into the icache */       
25_exportfn(C(flush_cache_lines))
26        __ifdef(`LINUX')
27        __(add r1,r1,r0)
28        __(mov r2,#0)           /* options.  Pass as 0 until we know better */
29        __(mov r12,r7)          /* preserve r7 ;  r12 saved by syscall */
30        __(mov r7,#0x0f0000)     /* __ARM_NR_cacheflush */
31        __(add r7,r7,#2)
32        __(svc #0)
33        __(mov r7,r12)
34        __endif
35        __ifdef(`DARWIN')
36        __(mov r3,#0)
37        __(mov r12,#0x80000000)
38        __(svc #0)
39        __endif   
40        __(isb sy)             
41        __(bx lr)
42
43_exportfn(C(touch_page))
44        __(str r0,[r0,#0])
45        __(mov r1,#0)
46        __(str r1,[r0,#0])
47        __(mov r0,#1)
48        .globl C(touch_page_end)
49C(touch_page_end):     
50        __(bx lr)
51_endfn       
52                               
53_exportfn(C(current_stack_pointer))
54        __(mov r0,sp)
55        __(bx lr)
56_endfn
57       
58_exportfn(C(count_leading_zeros))
59        __(clz r0,r0)
60        __(bx lr)
61_endfn
62
63_exportfn(C(noop))
64        __(bx lr)
65_endfn
66
67
68
69
70
71/* Atomically store new value (r2) in *r0, if old value == expected (r1). */
72/* Return actual old value. */
73
74_exportfn(C(store_conditional))
750:      __(ldrex r3,[r0])
76        __(cmp r3,r1)
77        __(bne 1f)
78        __(strex ip,r2,[r0])
79        __(cmp ip,#0)
80        __(bne 0b)
81        __(b 2f)
821:      __(clrex)
832:      __(mov r0,r3)
84        __(bx lr)               
85_endfn
86
87/* Atomically store new_value(r1) in *r0 ;  return previous contents */
88/* of *r0. */
89
90_exportfn(C(atomic_swap))
91        __(mov r2,r0)
920:      __(ldrex r0,[r2])
93        __(strex r3,r1,[r2])
94        __(cmp r3,#0)
95        __(bne 0b)       
96        __(bx lr)
97_endfn
98
99/* Logior the value in *r0 with the value in r1 (presumably a bitmask with exactly 1 */
100/* bit set.)  Return non-zero if any of the bits in that bitmask were already set. */
101       
102_exportfn(C(atomic_ior))
103        __(stmdb sp!,{r4,lr})
1040:      __(ldrex r2,[r0])
105        __(orr r3,r2,r1)
106        __(strex r4,r3,[r0])
107        __(cmp r4,#0)
108        __(bne 0b)
109        __(mov r0,r2)
110        __(ldmia sp!,{r4,pc})
111_endfn
112
113
114/* Logand the value in *r0 with the value in r1 (presumably a bitmask with exactly 1 */
115/* bit set.)  Return the value now in *r0 (for some value of "now" */
116
117_exportfn(C(atomic_and))
1180:      __(ldrex r2,[r0])
119        __(and r2,r2,r1)
120        __(strex r3,r2,[r0])
121        __(cmp r3,#0)
122        __(bne 0b)
123        __(mov r0,r2)
124        __(bx lr)
125_endfn
126               
127       
128        __ifdef(`DARWIN')
129_exportfn(C(enable_fp_exceptions))
130        __(.long 0)
131        __(bx lr)
132_endfn
133       
134_exportfn(C(disable_fp_exceptions))
135        __(.long 0)
136        __(bx lr)
137_endfn
138
139_exportfn(C(pseudo_sigreturn))
140        __(uuo_pseudo_sigreturn())
141        __(b C(pseudo_sigreturn))
142_endfn
143        __endif
144       
145_exportfn(C(save_fp_context))
146        __(uuo_debug_trap(al))
147_endfn         
148_exportfn(C(restore_fp_context))
149        __(uuo_debug_trap(al))
150_endfn         
151_exportfn(C(put_vector_registers))
152        __(uuo_debug_trap(al))
153_endfn         
154_exportfn(C(get_vector_registers))
155        __(uuo_debug_trap(al))
156_endfn
157       
158        __ifdef(`ANDROID')
159_exportfn(rt_sigprocmask)
160        __(stmdb sp!,{r7,lr})
161        __(mov r7,#175)
162        __(svc #0)
163        __(ldmia sp!,{r7,pc})
164_endfn
165        __endif
166       
167
168        __ifdef(`DARWIN')
169/* divide the 64-bit unsigned integer in r0/r1 by the 64-bit unsigned
170   integer in r2/r3; return the 64-bit quotient in r0/r1 and the 64-bit
171   remainder in r2/r3.  Implement this in terms of the libgcc function: 
172
173   unsigned long long __udivti3 (unsigned long long a, 
174                                 unsigned long long b, 
175                                 unsigned long long *c)
176*/       
177_exportfn(C(__aeabi_uldivmod))
178        __(stmdb sp!,{r7,lr})
179        __(mov r7,sp)
180        __(sub sp,sp,#8)
181        __(mov ip,sp)
182        __(push1(ip,sp))
183        __(push1(ip,sp))
184        __(bl C(__udivmoddi4))
185        __(add sp,sp,#8)
186        __(ldmia sp!,{r2,r3})
187        __(ldmia sp!,{r7,pc})
188_endfn               
189        __endif
190
191_exportfn(call_handler_on_main_stack)
192        __(ldr ip,[sp])
193        __(mov lr,r3)
194        __(mov sp,r1)
195        __(bx ip)
196_endfn               
197
198/* zero N (r1) dnodes, starting at the dnode-aligned address in r0 */
199_exportfn(C(zero_dnodes))
200        __(cmp r1,#0)
201        __(adr r2,2f)
202        __(fldd d0,[r2,#0])
203        __(b 1f)
2040:      __(subs r1,r1,#1)
205        __(fstd d0,[r0])
206        __(add r0,r0,#dnode_size)       
2071:      __(bne 0b)
208        __(bx lr)
209        .align 3
2102:      .long 0
211        .long 0       
212_endfn                       
213                               
214        _endfile
215
Note: See TracBrowser for help on using the repository browser.