Changeset 13827
- Timestamp:
- Jun 14, 2010, 10:20:11 AM (10 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/arm/lisp-kernel/arm-spentry.s
r13811 r13827 496 496 /* funcall nfn, returning multiple values if it does. */ 497 497 _spentry(mvpass) 498 __(subs imm0,nargs,#node_size*nargregs) 499 __(movge imm0,#0) 500 __(add imm0,vsp,imm0) 501 __(build_lisp_frame(temp1,imm0)) 498 __(cmp nargs,#node_size*nargregs) 499 __(mov imm1,vsp) 500 __(subgt imm1,imm1,#node_size*nargregs) 501 __(addgt imm1,imm1,nargs) 502 __(build_lisp_frame(imm0,imm1)) 502 503 __(adr lr,C(ret1valn)) 503 504 __(mov fn,#0) … … 884 885 _spentry(set_hash_key) 885 886 C(egc_set_hash_key): 886 dnl __(cmplr(cr2,arg_z,arg_x)) 887 dnl __(la imm0,misc_data_offset(arg_y)) 888 dnl __(str arg_z,arg_x,imm0) 889 dnl __(blelr cr2) 890 dnl __(add imm0,imm0,arg_x) 891 dnl __(ref_global(imm2,ref_base)) 892 dnl __(load_highbit(imm3)) 893 dnl __(ref_global(imm1,oldspace_dnode_count)) 894 dnl __(sub imm0,imm0,imm2) 895 dnl __(srri(imm0,imm0,dnode_shift)) 896 dnl __(cmplr(imm0,imm1)) 897 dnl __(extract_bit_shift_count(imm4,imm0)) 898 dnl __(srri(imm0,imm0,bitmap_shift)) 899 dnl __(srr(imm3,imm3,imm4)) 900 dnl __(ref_global(imm2,refbits)) 901 dnl __(bgelr) 902 dnl __(slri(imm0,imm0,word_shift)) 903 dnl __(ldrx(imm1,imm2,imm0)) 904 dnl __(and. imm1,imm1,imm3) 905 dnl __(bne 2f) 906 dnl 1: __(lrarx(imm1,imm2,imm0)) 907 dnl __(or imm1,imm1,imm3) 908 dnl __(strcx(imm1,imm2,imm0)) 909 dnl __(bne- 1b) 910 dnl __(isync) 911 dnl 2: 912 dnl __(ref_global(imm1,ref_base)) 913 dnl __(sub imm0,arg_x,imm1) 914 dnl __(srri(imm0,imm0,dnode_shift)) 915 dnl __(load_highbit(imm3)) 916 dnl __(extract_bit_shift_count(imm4,imm0)) 917 dnl __(srri(imm0,imm0,bitmap_shift)) 918 dnl __(srr(imm3,imm3,imm4)) 919 dnl __(slri(imm0,imm0,word_shift)) 920 dnl __(ldrx(imm1,imm2,imm0)) 921 dnl __(and. imm1,imm1,imm3) 922 dnl __(bnelr) 923 dnl 3: __(lrarx(imm1,imm2,imm0)) 924 dnl __(or imm1,imm1,imm3) 925 dnl __(strcx(imm1,imm2,imm0)) 926 dnl __(bne- 3b) 927 dnl __(isync) 928 dnl __(bx lr) 929 887 __(cmp arg_z,arg_x) 888 __(add imm0,arg_y,#misc_data_offset) 889 __(str arg_z,[arg_x,imm0]) 890 __(bxhs lr) 891 __(add imm0,imm0,arg_x) 892 __(ref_global(temp0,ref_base)) 893 __(sub imm0,imm0,temp0) 894 __(mov imm0,imm0,lsr #dnode_shift) 895 __(ref_global(imm1,oldspace_dnode_count)) 896 __(cmp imm0,imm1) 897 __(bxhs lr) 898 __(and imm2,imm0,#31) 899 __(mov imm1,#0x80000000) 900 __(mov imm1,imm1,lsr imm2) 901 __(mov imm0,imm0,lsr #bitmap_shift) 902 __(ref_global(temp0,refbits)) 903 __(add temp0,temp0,imm0,lsl #word_shift) 904 0: __(ldrex imm2,[temp0]) 905 __(orr imm2,imm2,imm1) 906 __(strex imm0,imm2,[temp0]) 907 __(cmp imm0,#0) 908 __(bne 0b) 909 /* Now need to ensure that the hash table itself is in the refmap; we 910 know that it's in bounds, etc. */ 911 __(ref_global(temp0,ref_base)) 912 __(sub imm0,arg_x,temp0) 913 __(mov imm0,imm0,lsr #dnode_shift) 914 __(and imm2,imm0,#31) 915 __(mov imm1,#0x80000000) 916 __(mov imm1,imm1,lsr imm2) 917 __(mov imm0,imm0,lsr #bitmap_shift) 918 __(ref_global(temp0,refbits)) 919 __(add temp0,temp0,imm0,lsl #word_shift) 920 1: __(ldrex imm2,[temp0]) 921 __(orr imm2,imm2,imm1) 922 __(strex imm0,imm2,[temp0]) 923 __(cmp imm0,#0) 924 __(bne 1b) 925 __(bx lr) 926 930 927 931 928 /* … … 964 961 __(bne 1b) 965 962 __(cmp arg_z,arg_x) 966 __(b lo4f)963 __(bhi 4f) 967 964 968 965 __(ref_global(imm0,ref_base)) … … 981 978 __(orr imm2,imm2,imm1) 982 979 __(strex imm0,imm2,[temp0]) 980 .globl C(egc_set_hash_key_conditional_test) 981 C(egc_set_hash_key_conditional_test): 983 982 __(cmp imm0,#0) 984 983 __(bne 2b) … … 988 987 vsp`0' = (boxed) byte-offset 989 988 Interrupt-related issues are as in store_node_conditional, but 990 lwe have to do more work to actually do the memoization.*/989 we have to do more work to actually do the memoization.*/ 991 990 _spentry(set_hash_key_conditional) 992 991 .globl C(egc_set_hash_key_conditional) 993 992 C(egc_set_hash_key_conditional): 994 dnl __(cmplr(cr2,arg_z,arg_x)) 995 dnl __(vpop(imm4)) 996 dnl __(unbox_fixnum(imm4,imm4)) 997 dnl 1: __(lrarx(temp1,arg_x,imm4)) 998 dnl __(cmpr(cr1,temp1,arg_y)) 999 dnl __(bne cr1,5f) 1000 dnl __(strcx(arg_z,arg_x,imm4)) 1001 .globl C(egc_set_hash_key_conditional_test) 1002 C(egc_set_hash_key_conditional_test): 1003 dnl __(bne 1b) 1004 dnl __(isync) 1005 dnl __(add imm0,imm4,arg_x) 1006 dnl __(ref_global(imm2,ref_base)) 1007 dnl __(ref_global(imm1,oldspace_dnode_count)) 1008 dnl __(sub imm0,imm0,imm2) 1009 dnl __(load_highbit(imm3)) 1010 dnl __(srri(imm0,imm0,dnode_shift)) 1011 dnl __(cmplr(imm0,imm1)) 1012 dnl __(extract_bit_shift_count(imm2,imm0)) 1013 dnl __(srri(imm0,imm0,bitmap_shift)) 1014 dnl __(srr(imm3,imm3,imm2)) 1015 dnl __(ref_global(imm2,refbits)) 1016 dnl __(bge 4f) 1017 dnl __(slri(imm0,imm0,word_shift)) 1018 dnl 2: __(lrarx(imm1,imm2,imm0)) 1019 dnl __(or imm1,imm1,imm3) 1020 dnl __(strcx(imm1,imm2,imm0)) 1021 dnl __(bne- 2b) 1022 dnl __(isync) 1023 dnl /* Memoize hash table header */ 1024 dnl __(ref_global(imm1,ref_base)) 1025 dnl __(sub imm0,arg_x,imm1) 1026 dnl __(srri(imm0,imm0,dnode_shift)) 1027 dnl __(load_highbit(imm3)) 1028 dnl __(extract_bit_shift_count(imm4,imm0)) 1029 dnl __(srri(imm0,imm0,bitmap_shift)) 1030 dnl __(srr(imm3,imm3,imm4)) 1031 dnl __(slri(imm0,imm0,word_shift)) 1032 dnl __(ldrx(imm1,imm2,imm0)) 1033 dnl __(and. imm1,imm1,imm3) 1034 dnl __(bne 4f) 1035 dnl 3: __(lrarx(imm1,imm2,imm0)) 1036 dnl __(or imm1,imm1,imm3) 1037 dnl __(strcx(imm1,imm2,imm0)) 1038 dnl __(bne- 3b) 1039 dnl __(isync) 993 __(vpop1(imm1)) 994 __(unbox_fixnum(imm1,imm1)) 995 0: __(add imm2,arg_x,imm1) 996 __(ldrex temp1,[imm2]) 997 __(cmp temp1,arg_y) 998 __(bne 5f) 999 __(strex imm0,arg_z,[imm2]) 1000 __(bne 0b) 1001 __(cmp arg_z,arg_x) 1002 __(bhi 4f) 1003 __(ref_global(temp0,ref_base)) 1004 __(sub imm0,imm2,temp0) 1005 __(mov imm0,imm0,lsr #dnode_shift) 1006 __(ref_global(imm1,oldspace_dnode_count)) 1007 __(cmp imm0,imm1) 1008 __(bhs 4f) 1009 __(and imm2,imm0,#31) 1010 __(mov imm1,#0x80000000) 1011 __(mov imm1,imm1,lsr imm2) 1012 __(mov imm0,imm0,lsr #bitmap_shift) 1013 __(ref_global(temp0,refbits)) 1014 __(add temp0,temp0,imm0,lsl #word_shift) 1015 1: __(ldrex imm2,[temp0]) 1016 __(orr imm2,imm2,imm1) 1017 __(strex imm0,imm2,[temp0]) 1018 __(cmp imm0,#0) 1019 __(bne 1b) 1020 /* Now need to ensure that the hash table itself is in the refmap; we 1021 know that it's in bounds, etc. */ 1022 __(ref_global(temp0,ref_base)) 1023 __(sub imm0,arg_x,temp0) 1024 __(mov imm0,imm0,lsr #dnode_shift) 1025 __(and imm2,imm0,#31) 1026 __(mov imm1,#0x80000000) 1027 __(mov imm1,imm1,lsr imm2) 1028 __(mov imm0,imm0,lsr #bitmap_shift) 1029 __(ref_global(temp0,refbits)) 1030 __(add temp0,temp0,imm0,lsl #word_shift) 1031 1: __(ldrex imm2,[temp0]) 1032 __(orr imm2,imm2,imm1) 1033 __(strex imm0,imm2,[temp0]) 1034 __(cmp imm0,#0) 1035 __(bne 1b) 1040 1036 C(egc_write_barrier_end): 1041 1037 4: __(mov arg_z,#nil_value) … … 1922 1918 1923 1919 1924 1925 _spentry(unused0) 1926 1920 /* This doesn't need to memoize anything, but needs pc-lusering support 1921 support because of the locative */ 1922 _spentry(atomic_incf_node) 1923 __(unbox_fixnum(imm0,arg_z)) 1924 0: __(add imm2,arg_y,imm0) 1925 __(ldrex arg_z,[imm2]) 1926 __(add arg_z,arg_z,arg_x) 1927 __(strex imm0,arg_z,[imm2]) 1928 __(cmp imm0,#0) 1929 __(bne 0b) 1930 __(bx lr) 1931 1927 1932 _spentry(unused1) 1928 1933 … … 2211 2216 2212 2217 2213 dnl 2214 dnl 2215 dnl /* Nargs is valid; all arg regs, lexpr-count pushed by caller. */ 2216 dnl /* imm0 = vsp to restore. */ 2217 dnl /* Return all values returned by caller to its caller, hiding */ 2218 dnl /* the variable-length arglist. */ 2219 dnl /* If we can detect that the caller's caller didn't expect */ 2220 dnl /* multiple values, then things are even simpler. */ 2221 _spentry(lexpr_entry) 2222 dnl __(ref_global(imm1,ret1valn)) 2223 dnl __(cmpr(imm1,loc_pc)) 2224 dnl __(build_lisp_frame(fn,loc_pc,imm0)) 2225 dnl __(bne 1f) 2226 dnl __(ref_global(imm0,lexpr_return)) 2227 dnl __(build_lisp_frame(rzero,imm0,vsp)) 2228 dnl __(mov loc_pc,imm1) 2229 dnl __(ldr imm0,[rcontext,#tcr.cs_limit]) 2230 dnl __(trllt(sp,imm0)) 2231 dnl __(mov fn,#0) 2232 dnl __(bx lr) 2233 dnl 2234 dnl /* The single-value case just needs to return to something that'll pop */ 2235 dnl /* the variable-length frame off of the vstack. */ 2236 dnl 1: 2237 dnl __(ref_global(loc_pc,lexpr_return1v)) 2238 dnl __(ldr imm0,[rcontext,#tcr.cs_limit]) 2239 dnl __(trllt(sp,imm0)) 2240 dnl __(mov fn,#0) 2241 dnl __(bx lr) 2242 2218 /* Divide the 64 bit unsigned integer in imm0 (low) and imm1 (high) by 2219 the 32-bit unsigned integer in imm2; return the quotient in 2220 imm0:imm1 and remainder in imm2. We pretty much have to do this 2221 as an ff call; even if we wrote the code ourselves, we'd have to 2222 enter foreign context to use as many imm regs as we'd need. 2223 Moral: don't do integer division on the ARM. 2224 */ 2225 .globl C(__aeabi_uldivmod) 2226 _spentry(udiv64by32) 2227 __(cmp imm2,#0) 2228 __(moveq arg_z,#XDIVZRO) 2229 __(moveq nargs,#1<<fixnumshift) 2230 __(beq _SPksignalerr) 2231 __(stmdb vsp!,{arg_z,arg_y,arg_x,temp0,temp1,temp2}) 2232 __(str vsp,[rcontext,#tcr.save_vsp]) 2233 __(mov arg_z,rcontext) 2234 __(ldr arg_y,[rcontext,#tcr.last_lisp_frame]) 2235 __(build_lisp_frame(r3)) 2236 __(str sp,[arg_z,#tcr.last_lisp_frame]) 2237 __(str allocptr,[arg_z,#tcr.save_allocptr]) 2238 __(mov r3,#TCR_STATE_FOREIGN) 2239 __(str r3,[arg_z,#tcr.valence]) 2240 __(mov r3,#0) 2241 __(bl C(__aeabi_uldivmod)) 2242 __(mov rcontext,arg_z) 2243 __(str arg_y,[rcontext,#tcr.last_lisp_frame]) 2244 __(mov allocptr,#VOID_ALLOCPTR) 2245 __(mov fn,#0) 2246 __(mov temp2,#0) 2247 __(mov temp1,#0) 2248 __(mov temp0,#0) 2249 __(mov arg_x,#TCR_STATE_LISP) 2250 __(str arg_x,[rcontext,#tcr.valence]) 2251 __(ldr allocptr,[rcontext,#tcr.save_allocptr]) 2252 __(ldm vsp!,{arg_z,arg_y,arg_x,temp0,temp1,temp2}) 2253 __(ldr fn,[sp,#lisp_frame.savefn]) 2254 __(ldr lr,[sp,#lisp_frame.savelr]) 2255 __(discard_lisp_frame()) 2256 __(bx lr) 2243 2257 2244 2258 … … 2398 2412 _spentry(mvpasssym) 2399 2413 __(cmp nargs,#node_size*nargregs) 2400 __(mov nfn,vsp)2401 __(subgt nfn,nfn,#node_size*nargregs)2402 __(addgt nfn,nfn,nargs)2403 __(build_lisp_frame(imm0,nfn))2414 __(mov imm1,vsp) 2415 __(subgt imm1,imm1,#node_size*nargregs) 2416 __(addgt imm1,imm1,nargs) 2417 __(build_lisp_frame(imm0,imm1)) 2404 2418 __(ref_global(lr,ret1val_addr,imm0)) 2405 2419 __(mov fn,#0) … … 2850 2864 __(b local_label(error_exit)) 2851 2865 2852 /* Most ARMs don't have hardware integer division. This algorithm's 2853 from Sloss, Symes, & Wright. On entry: imm0 = numerator, imm1 = denominator; 2854 on exit, imm0 = quotient, imm1 = remainder, imm2 clobbered. Check for /0 2855 here, so that callers don't have to. 2856 */ 2866 .globl C(__aeabi_uidivmod) 2857 2867 _spentry(udiv32) 2858 __(cmp imm 0,#0)2868 __(cmp imm1,#0) 2859 2869 __(moveq arg_z,#XDIVZRO) 2860 2870 __(moveq nargs,#1<<fixnumshift) 2861 2871 __(beq _SPksignalerr) 2862 __(ldr imm2,[rcontext,#tcr.flags]) 2863 __(orr imm2,imm2,#(1<<TCR_FLAG_BIT_ALLOCPTR_FOREIGN)) 2864 __(str imm2,[rcontext,#tcr.flags]) 2865 __(vpush1(rcontext)) 2866 /* Hopefully safe now to use r3 (rcontext) and r12 (allocptr) 2867 as imm regs. */ 2868 pushdef(`q',`r0') 2869 pushdef(`r',`r1') 2870 pushdef(`s',`r2') 2871 pushdef(`m',`r3') 2872 pushdef(`a',`r12') 2873 __(clz s,q) 2874 __(movs a,q,lsl s) 2875 __(add a,pc,a,lsr #25) 2876 __(ldrbeq a,[a,#local_label(t32)-local_label(b32)-64]) 2877 local_label(b32): 2878 __(subs s,s,#7) 2879 __(rsb m,q,#0) 2880 __(movpl q,a,lsl s) 2881 __(mulpl a,q,m) 2882 __(bmi local_label(udiv_by_large_d)) 2883 __(smlawt q,q,a,q) 2884 __(teq m,m,asr #1) 2885 __(mulne a,q,m) 2886 __(movne s,#0) 2887 __(smlalne s,q,a,q) 2888 __(beq local_label(udiv_by_1)) 2889 __(umull s,q,r,q) 2890 __(add r,r,m) 2891 __(mla r,q,m,r) 2892 __(cmn r,m) 2893 __(subcs r,r,m) 2894 __(addcc q,q,#1) 2895 __(addpl r,r,m,lsl #1) 2896 __(addpl q,q,#2) 2897 __(b local_label(done)) 2898 local_label(udiv_by_large_d): 2899 __(sub a,a,#4) 2900 __(rsb s,s,#0) 2901 __(mov q,a,lsr s) 2902 __(umull s,q,r,q) 2903 __(mla r,q,m,r) 2904 __(cmn m,r,lsr #1) 2905 __(addcs r,r,m,lsl #1) 2906 __(addcs q,q,#2) 2907 __(cmn m,r) 2908 __(addcs q,q,#1) 2909 __(b local_label(done)) 2910 local_label(udiv_by_1): 2911 __(mov q,r) 2912 __(mov r,#0) 2913 local_label(done): 2914 __(mov allocptr,#-8) 2915 __(vpop1(rcontext)) 2916 __(ldr imm2,[rcontext,tcr.flags]) 2917 __(bic imm2,imm2,#(1<<TCR_FLAG_BIT_ALLOCPTR_FOREIGN)) 2918 __(str imm2,[rcontext,tcr.flags]) 2919 __(ldr allocptr,[rcontext,tcr.save_allocptr]) 2920 __(bx lr) 2921 popdef(`s') 2922 popdef(`m') 2923 popdef(`a') 2924 popdef(`q') 2925 popdef(`r') 2926 local_label(t32): 2927 .byte 0xff,0xfc,0xf8,0xf4,0xf0,0xed,0xea,0xe6 2928 .byte 0xe3,0xe0,0xdd,0xda,0xd7,0xd4,0xd2,0xcf 2929 .byte 0xcc,0xca,0xc7,0xc5,0xc3,0xc0,0xbe,0xbc 2930 .byte 0xba,0xb8,0xb6,0xb4,0xb2,0xb0,0xae,0xac 2931 .byte 0xaa,0xa8,0xa7,0xa5,0xa3,0xa2,0xa0,0x9f 2932 .byte 0x9d,0x9c,0x9a,0x99,0x97,0x96,0x94,0x93 2933 .byte 0x92,0x90,0x8f,0x8e,0x8d,0x8c,0x8a,0x89 2934 .byte 0x88,0x87,0x86,0x85,0x84,0x83,0x82,0x81 2872 __(stmdb vsp!,{arg_z,arg_y,arg_x,temp0,temp1,temp2}) 2873 __(str vsp,[rcontext,#tcr.save_vsp]) 2874 __(mov arg_z,rcontext) 2875 __(ldr arg_y,[rcontext,#tcr.last_lisp_frame]) 2876 __(build_lisp_frame(r3)) 2877 __(str sp,[arg_z,#tcr.last_lisp_frame]) 2878 __(str allocptr,[arg_z,#tcr.save_allocptr]) 2879 __(mov r3,#TCR_STATE_FOREIGN) 2880 __(str r3,[arg_z,#tcr.valence]) 2881 __(mov r3,#0) 2882 __(bl C(__aeabi_uidivmod)) 2883 __(mov rcontext,arg_z) 2884 __(str arg_y,[rcontext,#tcr.last_lisp_frame]) 2885 __(mov allocptr,#VOID_ALLOCPTR) 2886 __(mov fn,#0) 2887 __(mov temp2,#0) 2888 __(mov temp1,#0) 2889 __(mov temp0,#0) 2890 __(mov arg_x,#TCR_STATE_LISP) 2891 __(str arg_x,[rcontext,#tcr.valence]) 2892 __(ldr allocptr,[rcontext,#tcr.save_allocptr]) 2893 __(ldm vsp!,{arg_z,arg_y,arg_x,temp0,temp1,temp2}) 2894 __(ldr fn,[sp,#lisp_frame.savefn]) 2895 __(ldr lr,[sp,#lisp_frame.savelr]) 2896 __(discard_lisp_frame()) 2897 __(bx lr) 2935 2898 2936 2899 _spentry(sdiv32) … … 2957 2920 __(ldr arg_y,[rcontext,#tcr.last_lisp_frame]) 2958 2921 __(stmdb vsp!,{arg_y,arg_x,temp0,temp1,temp2}) 2922 __(str vsp,[rcontext,#tcr.save_vsp]) 2959 2923 /* There's a u32 vector on top of the stack ; its first data word points 2960 2924 to the previous stack object. The 4 words at the bottom of the vector
Note: See TracChangeset
for help on using the changeset viewer.