source: trunk/source/lisp-kernel/x86-macros.s @ 8562

Last change on this file since 8562 was 8562, checked in by andreas, 13 years ago

Access TCR through a macro, with different implementations for UNIXen (gs-segment-based
addressing) and Windows (keep TCR pointer in a GPR). Choice of r11 for TCR is purely arbitrary,
might still change, and requires more changes in the rest of the assembler base that are not in
place yet.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 12.3 KB
Line 
1/*   Copyright (C) 2005 Clozure Associates  */
2/*   This file is part of OpenMCL.    */
3
4/*   OpenMCL is licensed under the terms of the Lisp Lesser GNU Public  */
5/*   License , known as the LLGPL and distributed with OpenMCL as the  */
6/*   file "LICENSE".  The LLGPL consists of a preamble and the LGPL,  */
7/*   which is distributed with OpenMCL as the file "LGPL".  Where these  */
8/*   conflict, the preamble takes precedence.    */
9
10/*   OpenMCL is referenced in the preamble as the "LIBRARY."  */
11
12/*   The LLGPL is also available online at  */
13/*   http://opensource.franz.com/preamble.html  */
14
15
16/* Try to make macros follow GAS/ATT conventions, where source precedes  */
17/* destination.  */
18
19define([lisp_global],[lisp_globals.$1])
20                                       
21define([ref_global],[
22        __(mov lisp_global($1),$2)
23])
24
25define([set_global],[
26        __(mov $1,lisp_global($2))
27])
28
29define([ref_nrs_value],[
30        __(mov nrs.$1+symbol.vcell,$2)
31])
32       
33define([set_nrs_value],[
34        __(mov $1,nrs.$2+symbol.vcell)
35])
36                                                       
37define([unbox_fixnum],[
38        __(mov $1,$2)
39        __(sar [$]fixnumshift,$2)
40])
41
42define([box_fixnum],[
43        __(imulq [$]fixnumone,$1,$2)
44])     
45
46
47/* box_fixnum, with no effect on flags */
48define([box_fixnum_no_flags],[
49        __(leaq (,$1,8),$2)
50])
51                               
52
53/* Zero $3 bytes worth of dnodes, starting at offset $2 relative  */
54/* to the base register $1.  */
55
56
57ifdef([DarwinAssembler],[
58        .macro zero_dnodes
59        .if $2
60        __(movapd %fpzero,$1($0))
61        __(zero_dnodes $0,$1+dnode_size,$2-dnode_size)
62        .endif
63        .endmacro
64],[
65        .macro zero_dnodes base,disp,nbytes
66        .ifgt \nbytes
67        movapd %fpzero,\disp(\base)
68        zero_dnodes \base,"\disp+dnode_size","\nbytes-dnode_size"
69        .endif
70        .endm
71])     
72
73
74/* Allocate $1+dnode_size zeroed bytes on the tstack, using $2 as a temp  */
75/* reg.  */
76       
77define([TSP_Alloc_Fixed],[
78        define([TSP_Alloc_Size],[((($1+node_size) & ~(dnode_size-1))+dnode_size)])
79        __(subq [$]TSP_Alloc_Size,rcontext(tcr.next_tsp))
80        __(movq rcontext(tcr.save_tsp),%stack_temp)
81        __(movq rcontext(tcr.next_tsp),$2)
82        zero_dnodes $2,0,TSP_Alloc_Size
83        __(movq %stack_temp,($2))
84        __(movq %rbp,tsp_frame.save_rbp($2))
85        __(movq $2,rcontext(tcr.save_tsp))
86        undefine([TSP_Alloc_Size])
87])
88
89/* $1 = size (dnode-aligned, including tsp overhead, $2 scratch.  */
90/* Modifies both $1 and $2; on exit, $2 = new_tsp+tsp_overhead, $1 = old tsp  */
91       
92define([TSP_Alloc_Var],[
93        new_macro_labels()
94        subq $1,rcontext(tcr.next_tsp)
95        __(movq rcontext(tcr.save_tsp),%stack_temp)
96        __(movq rcontext(tcr.next_tsp),$2)
97        __(jmp macro_label(test))
98macro_label(loop):
99        __(movapd %fpzero,0($2))
100        __(addq $dnode_size,$2)
101macro_label(test):     
102        __(subq $dnode_size,$1)
103        __(jge macro_label(loop))
104        __(movq rcontext(tcr.next_tsp),$2)
105        __(movd %stack_temp,$1)
106        __(movq $1,($2))
107        __(movq %rbp,tsp_frame.save_rbp($2))
108        __(movq $2,rcontext(tcr.save_tsp))
109        __(addq $dnode_size,$2)
110])
111       
112       
113
114define([Allocate_Catch_Frame],[
115        TSP_Alloc_Fixed(catch_frame.size,$1)
116        __(movq [$](catch_frame.element_count<<subtag_shift)|subtag_catch_frame,dnode_size($1))
117        __(addq [$]dnode_size+fulltag_misc,$1)
118])
119
120/* %arg_z = tag,  %xfn = pc, $1 = mvflag          */
121       
122define([Make_Catch],[
123        Allocate_Catch_Frame(%imm2)
124        __(movq rcontext(tcr.catch_top),%imm0)
125        __(movq rcontext(tcr.db_link),%imm1)
126        __(movq %arg_z,catch_frame.catch_tag(%imm2))
127        __(movq %imm0,catch_frame.link(%imm2))
128        __(movq [$]$1,catch_frame.mvflag(%imm2))
129        __(movq rcontext(tcr.xframe),%imm0)
130        __(movq %rsp,catch_frame.rsp(%imm2))
131        __(movq %rbp,catch_frame.rbp(%imm2))
132        __(movq rcontext(tcr.foreign_sp),%stack_temp)
133        __(movq %imm1,catch_frame.db_link(%imm2))
134        __(movq %save3,catch_frame._save3(%imm2))
135        __(movq %save2,catch_frame._save2(%imm2))
136        __(movq %save1,catch_frame._save1(%imm2))
137        __(movq %save0,catch_frame._save0(%imm2))
138        __(movq %imm0,catch_frame.xframe(%imm2))
139        __(movq %stack_temp,catch_frame.foreign_sp(%imm2))
140        __(movq %xfn,catch_frame.pc(%imm2))
141        __(movq %imm2,rcontext(tcr.catch_top))
142])     
143
144define([nMake_Catch],[
145        Allocate_Catch_Frame(%imm2)
146        __(movq rcontext(tcr.catch_top),%imm0)
147        __(movq rcontext(tcr.db_link),%imm1)
148        __(movq %arg_z,catch_frame.catch_tag(%imm2))
149        __(movq %imm0,catch_frame.link(%imm2))
150        __(lea node_size(%rsp),%imm0)
151        __(movq [$]$1,catch_frame.mvflag(%imm2))
152        __(movq %imm0,catch_frame.rsp(%imm2))
153        __(movq rcontext(tcr.xframe),%imm0)
154        __(movq %rbp,catch_frame.rbp(%imm2))
155        __(movq rcontext(tcr.foreign_sp),%stack_temp)
156        __(movq %imm1,catch_frame.db_link(%imm2))
157        __(movq %save3,catch_frame._save3(%imm2))
158        __(movq %save2,catch_frame._save2(%imm2))
159        __(movq %save1,catch_frame._save1(%imm2))
160        __(movq %save0,catch_frame._save0(%imm2))
161        __(movq %imm0,catch_frame.xframe(%imm2))
162        __(movq %stack_temp,catch_frame.foreign_sp(%imm2))
163        __(movq %xfn,catch_frame.pc(%imm2))
164        __(movq %imm2,rcontext(tcr.catch_top))
165])     
166               
167       
168/* Consing can get interrupted (either by PROCESS-INTERRUPT or by GC  */
169/* activity in some other thread; if it's interrupted, the interrupting  */
170/* process needs to be able to determine what's going on well enough  */
171/* to be able to either back out of the attempt or finish the job.  */
172/* That requires that we use easily recogninized instruction sequences  */
173/* and follow certain conventions when consing (either in the kernel  */
174/* or in compiled code.)  (One of those conventions involves using  */
175/* %allocptr = %temp0 as a freepointer; when consing, %temp0 can't  */
176/* contain a live value.)  */
177/* Making a CONS cell is a little simpler than making a uvector.  */
178
179/* $1=new_car,$2=new_cdr,$3=dest   */
180define([Cons],[
181        new_macro_labels()
182/* The instructions where tcr.save_allocptr is tagged are difficult  */
183/* to interrupt; the interrupting code has to recognize and possibly  */
184/* emulate the instructions in between   */
185        __(subq $cons.size-fulltag_cons,rcontext(tcr.save_allocptr))
186        __(movq rcontext(tcr.save_allocptr),%allocptr)
187        __(rcmpq(%allocptr,rcontext(tcr.save_allocbase)))
188        __(jg macro_label(no_trap))
189        uuo_alloc()
190macro_label(no_trap):   
191        __(andb $~fulltagmask,rcontext(tcr.save_allocptr))
192/* Easy to interrupt now that tcr.save_allocptr isn't tagged as a cons    */
193        __(movq $2,cons.cdr(%allocptr))
194        __(movq $1,cons.car(%allocptr))
195        ifelse($3,[],[],[
196         __(movq %allocptr,$3)
197        ])
198])
199
200/* The header has to be in %imm0, and the physical size in bytes has  */
201/*  to be in %imm1. We bash %imm1.   */
202
203define([Misc_Alloc],[
204        __(subq [$]fulltag_misc,%imm1)
205        Misc_Alloc_Internal($1)
206])
207
208define([Misc_Alloc_Internal],[                 
209/* Here Be Monsters: we have to treat some/all of this instruction   */
210/* sequence atomically, as soon as tcr.save_allocptr becomes tagged.  */
211               
212        new_macro_labels()
213        __(subq %imm1,rcontext(tcr.save_allocptr))
214        __(movq rcontext(tcr.save_allocptr),%allocptr)
215        __(rcmpq(%allocptr,rcontext(tcr.save_allocbase)))
216        __(jg macro_label(no_trap))
217        uuo_alloc()
218macro_label(no_trap):   
219        __(movq %imm0,misc_header_offset(%allocptr))
220        __(andb $~fulltagmask,rcontext(tcr.save_allocptr))
221/* Now that tcr.save_allocptr is untagged, it's easier to be interrupted   */
222        ifelse($1,[],[],[
223         __(mov %allocptr,$1)
224        ])
225])
226       
227define([Misc_Alloc_Fixed],[
228        __(movq [$]$2-fulltag_misc,%imm1)
229        Misc_Alloc_Internal($1)
230])                                     
231
232define([vrefr],[
233        __(mov misc_data_offset+($3<<word_shift)($2),$1)
234])     
235
236define([jump_fn],[
237        __(jmpq *%fn)
238])
239                       
240define([jump_fname],[
241        __(mov symbol.fcell(%fname),%fn)
242        jump_fn()
243])     
244       
245define([set_nargs],[
246        ifelse(eval($1>15),1,[
247        __(movl [$]$1<<fixnumshift,%nargs)
248        ],[
249        __(xorl %nargs,%nargs)
250        ifelse(eval($1),0,[],[
251        __(addl [$]$1<<fixnumshift,%nargs)
252        ])])])
253       
254
255
256/* $1 = ndigits.  Assumes 4-byte digits           */
257define([aligned_bignum_size],[((~(dnode_size-1)&(node_size+(dnode_size-1)+(4*$1))))])
258       
259
260define([_car],[
261        __(movq cons.car($1),$2)
262])     
263
264define([_rplaca],[
265        __(movq $2,cons.car($1))
266])     
267               
268define([_cdr],[
269        __(movq cons.cdr($1),$2)
270])
271
272define([_rplacd],[
273        __(movq $2,cons.cdr($1))
274])     
275               
276       
277       
278define([tra],[
279        .p2align 3
280        ifelse($2,[],[
281        .long 0
282        ],[
283        .long $1-$2
284        ])
285$1:     
286])
287                               
288define([do_funcall],[
289        new_macro_labels()
290        __(movb %temp0_b,%imm0_b)
291        __(andb $fulltagmask,%imm0_b)
292        __(cmpb $fulltag_symbol,%imm0_b)
293        /* %fname == %temp0   */
294        __(cmovgq %temp0,%fn)
295        jl macro_label(bad)
296        __(cmoveq symbol.fcell(%fname),%fn)
297        __(jmp *%fn)
298macro_label(bad):               
299        __(uuo_error_not_callable)
300])
301
302define([getvheader],[
303        __(movq misc_header_offset($1),$2)
304])
305
306/* "Size" is unboxed element-count.  $1 (header) and $2 (dest) should  */
307/*    both be immediate registers   */
308define([header_size],[
309        __(movq $1,$2)
310        __(shr $num_subtag_bits,$2)
311])
312
313/* $2 (length) is fixnum element-count.   */
314define([header_length],[
315        __(movq $~255,$2)
316        __(andq $1,$2)
317        __(shr $num_subtag_bits-fixnumshift,$2)
318])
319
320/* $1 = vector, $2 = header, $3 = dest   */
321define([vector_size],[                                 
322        getvheader($1,$2)
323        header_size($2,$3)
324])
325
326/* $1 = vector, $2 = dest   */
327define([vector_length],[                                 
328        __(movq $~255,$2)
329        __(andq misc_header_offset($1),$2)
330        __(shr $num_subtag_bits-fixnumshift,$2)
331])
332               
333/* GAS/ATT comparison arg order drives me nuts   */
334define([rcmpq],[
335        __(cmpq $2,$1)
336])
337
338define([rcmpl],[
339        __(cmpl $2,$1)
340])     
341
342define([rcmpw],[
343        __(cmpw $2,$1)
344])     
345
346define([rcmpb],[
347        __(cmpb $2,$1)
348])             
349
350
351define([condition_to_boolean],[
352        __(movl [$]t_value,$2_l)
353        __(lea (-t_offset)($2),$3)
354        __(cmov$1l $2_l,$3_l)
355])
356
357define([compare_reg_to_nil],[
358        __(cmpb $fulltag_nil,$1_b)
359])             
360       
361define([extract_lisptag],[
362        __(movzbl $1_b,$2_l)
363        __(andb [$]tagmask,$2_b)
364])
365
366                                                               
367define([extract_fulltag],[
368        __(movzbl $1_b,$2_l)
369        __(andb [$]fulltagmask,$2_b)
370])
371
372define([extract_subtag],[
373        __(movb misc_subtag_offset($1),$2)
374])
375
376define([extract_typecode],[
377        new_macro_labels()
378        __(movzbl $1_b,$2_l)
379        __(andb $tagmask,$2_b)
380        __(cmpb $tag_misc,$2_b)
381        __(jne macro_label(done))
382        __(movb misc_subtag_offset($1),$2_b)
383macro_label(done):     
384])
385
386/* dnode_align(src,delta,dest)  */
387
388        define([dnode_align],[
389        __(lea ($2+(dnode_size-1))($1),$3)
390        __(andb $~(dnode_size-1),$3_b)
391])
392       
393define([push_argregs],[
394        new_macro_labels()
395        __(testl %nargs,%nargs)
396        __(jz macro_label(done))
397        __(cmpl [$]2*node_size,%nargs)
398        __(je macro_label(yz))
399        __(jb macro_label(z))
400        __(push %arg_x)
401macro_label(yz):
402        __(push %arg_y)
403macro_label(z):
404        __(push %arg_z)
405macro_label(done):
406])     
407
408
409/* $1 = ndigits.  Assumes 4-byte digits           */
410define([aligned_bignum_size],[((~(dnode_size-1)&(node_size+(dnode_size-1)+(4*$1))))])
411
412define([discard_temp_frame],[
413        __(movq rcontext(tcr.save_tsp),$1)
414        __(movq ($1),$1)
415        __(movq $1,rcontext(tcr.save_tsp))
416        __(movq $1,rcontext(tcr.next_tsp))
417
418])     
419
420define([check_pending_enabled_interrupt],[
421        __(btrq [$]63,rcontext(tcr.interrupt_pending))
422        __(jnc,pt $1)
423        interrupt_now()
424])
425       
426/* $1 = scratch register, used to access tcr.tlb_pointer.  An interrupt  */
427/*   should be taken if interrupts are enabled and the most significant  */
428/*   bit of tcr.interrupt_pending is set.  If we take the interrupt, we  */
429/*   test and clear the pending bit.  */
430
431define([check_pending_interrupt],[
432        new_macro_labels()
433        __(movq rcontext(tcr.tlb_pointer),$1)
434        __(cmpq [$]0,INTERRUPT_LEVEL_BINDING_INDEX($1))
435        __(js,pt macro_label(done))
436        check_pending_enabled_interrupt(macro_label(done))
437macro_label(done):
438])
439
440/* This should only be called from a foreign context; it should be */
441/* assumed to bash all non-volatile C registers.  And of course it's */
442/* ugly, awful, non-portable, and slow.  %rdi should point to the */
443/* linear address that %gs should be made to address (tcr or pthread data) */
444                               
445ifdef([DARWIN_GS_HACK],[
446define([set_gs_base],[
447        ifelse($1,[],[
448        ],[
449        __(movq $1,%rdi)
450        ])
451        __(movl [$]0x3000003,%eax)
452        __(syscall)
453])
454
455/* %gs addresses the tcr.  Make it address pthread data before running */
456/* foreign code */       
457       
458define([set_foreign_gs_base],[
459        set_gs_base([rcontext(tcr.osid)])
460])
461
462/* %gs addresses the tcr.  Get the linear address of the tcr and */
463/* copy it to $1 */
464
465define([save_tcr_linear],[
466        __(movq rcontext(tcr.linear),$1)
467]) 
468       
469])
470
471/*  On AMD hardware (at least), a one-byte RET instruction should be */
472/*  prefixed with a REP prefix if it (a) is the target of a  */
473/*  branch or (b) immediately follows a conditional branch not taken. */
474define([repret],[
475        __(.byte 0xf3)
476        __(ret)
477])
478                               
479       
Note: See TracBrowser for help on using the repository browser.