Index: /branches/ia32/level-0/X86/X8632/x8632-bignum.lisp
===================================================================
--- /branches/ia32/level-0/X86/X8632/x8632-bignum.lisp	(revision 7962)
+++ /branches/ia32/level-0/X86/X8632/x8632-bignum.lisp	(revision 7963)
@@ -6,4 +6,5 @@
 ;;; The easiest thing to do is to store the 32 raw bits in two fixnums
 ;;; and return multiple values.
+;;;
 ;;; XXX -- figure out how we're going to handle multiple-values...
 (defx8632lapfunction %bignum-ref ((bignum arg_y) (i arg_z))
@@ -93,67 +94,182 @@
 (defx8632lapfunction %add-with-carry ((r 20) (k 16) (c 12) (a 8) (i 4) #|(ra 0)|# (b arg_y) (j arg_z))
   (mark-as-imm temp0)
-  (unbox-fixnum b imm0)			;assume that j is going to be nil
-  (cmpl ($ x8632::nil-value) (% j))	;is j in fact nil?
-  (jne @got-b)
-  (movl (@ x8632::misc-data-offset (% b) (% j)) (% imm0))
-  @got-b
+  (unbox-fixnum b imm0)
+  (cmpl ($ x8632::nil-value) (% j))
+  ;; if j not nil, get b[j]
+  (cmovnel (@ x8632::misc-data-offset (% b) (% j)) (% imm0))
   (movl (@ a (% esp)) (% arg_y))
-  (unbox-fixnum arg_y temp0)		;assume that i is going be nil
+  (unbox-fixnum arg_y temp0)
   (movl (@ i (% esp)) (% arg_z))
-  (cmpl ($ x8632::nil-value) (% arg_z))	;is i in fact nil?
-  (jne @got-a)
-  (movl (@ x8632::misc-data-offset (% arg_y) (% arg_z)) (% temp0))
-  @got-a
-  ;; unboxed a or a[i] now in temp0
-  ;; unboxed b or b[j] now in imm0
+  (cmpl ($ x8632::nil-value) (% arg_z))
+  ;; if i not nil, get a[i]
+  (cmovnel (@ x8632::misc-data-offset (% arg_y) (% arg_z)) (% temp0))
   (movl ($ '1) (% arg_z))		;for outgoing carry
-  (movl (@ c (% esp)) (% arg_y))
-  (testl (% arg_y) (% arg_y))		;clears carry flag
-  (jz @add)
   (xorl (% arg_y) (% arg_y))
-  (stc)
-  @add
-  ;; arg_y = 0, arg_z = fixnum 1
+  ;; I can't think of a better way to set CF at the moment.
+  ;; NEG would be ideal, but we don't have a free imm reg.
+  (btl ($ x8632::fixnumshift) (@ c (% esp))) ;CF = lsb of carry fixnum 
   (adc (% temp0) (% imm0))
+  (mark-as-node temp0)
   (movl (@ r (% esp)) (% temp0))
   (movl (@ k (% esp)) (% temp1))
   (movl (% imm0) (@ x8632::misc-data-offset (% temp0) (% temp1)))
-  (cmovnc (% arg_y) (% arg_z))		;zero outgoing carry if no carry
-  (mark-as-node temp0)
+  (cmovnc (% arg_y) (% arg_z))		;zero outgoing carry if CF = 0
   (single-value-return 7))
+
+;;; Add the digits A[I] and B[J], and the incoming carry C (a fixnum).
+;;; Store the result in R[K], and return the outgoing carry.  If I is
+;;; NIL, A is a fixnum.  If J is NIL, B is a fixnum.
+#+sse2
+(defx8632lapfunction %add-with-carry ((r 20) (k 16) (c 12) (a 8) (i 4) #|(ra 0)|# (b arg_y) (j arg_z))
+  (let ((aa mm2)
+	(bb mm3)
+	(cc mm4))
+    (unbox-fixnum b imm0)		;assume j will be nil
+    (cmpl ($ x8632::nil-value) (% j))
+    ;; if j not nil, get b[j]
+    (cmovnel (@ x8632::misc-data-offset (% b) (% j)) (% imm0))
+    (movd (% imm0) (% bb))
+    (movl (@ a (% esp)) (% arg_y))
+    (movl (@ i (% esp)) (% arg_z))
+    (movl (@ c (% esp)) (% temp0))
+    (unbox-fixnum arg_y imm0)		;assume i will be nil
+    (cmpl ($ x8632::nil-value) (% arg_z))
+    ;; if i not nil, get a[i]
+    (cmovnel (@ x8632::misc-data-offset (% arg_y) (% arg_z)) (% imm0))
+    (movd (% imm0) (% aa))
+    (unbox-fixnum temp0 imm0)
+    (movd (% imm0) (% cc))
+    (paddq (% xx) (% yy))
+    (paddq (% cc) (% yy))
+    (movl (@ r (% esp)) (% temp0))
+    (movl (@ k (% esp)) (% temp1))
+    (movd (% yy) (@ x8632::misc-data-offset (% temp0) (% temp1)))
+    (psrlq ($ 32) (% yy))		;carry bit
+    (movd (% yy) (% imm0))
+    (box-fixnum imm0 arg_z)
+    (single-value-return 7)))
 
 ;;; Store the result of A[I] - B[J] - borrow into R[K], returning the borrow.
 ;;; If I is NIL, A is a fixnum; likewise for J and B.
+;;;
+;;; (a - b) - (1 - borrow), or equivalently, (a - b) + borrow - 1
+;;; 
+;;; Note: borrow is 1 for no borrow and 0 for a borrow.
 (defx8632lapfunction %subtract-with-borrow ((r 20) (k 16) (borrow 12) (a 8) (i 4) #|(ra 0)|# (b arg_y) (j arg_z))
   (mark-as-imm temp0)
-  (unbox-fixnum b imm0)			;assume that j is going to be nil
-  (cmpl ($ x8632::nil-value) (% j))	;is j in fact nil?
-  (jne @got-b)
-  (movl (@ x8632::misc-data-offset (% b) (% j)) (% imm0))
-  @got-b
+  (unbox-fixnum b imm0)
+  (cmpl ($ x8632::nil-value) (% j))
+  (cmovnel (@ x8632::misc-data-offset (% b) (% j)) (% imm0))
   (movl (@ a (% esp)) (% arg_y))
-  (unbox-fixnum arg_y temp0)		;assume that i is going be nil
+  (unbox-fixnum arg_y temp0)
   (movl (@ i (% esp)) (% arg_z))
-  (cmpl ($ x8632::nil-value) (% arg_z))	;is i in fact nil?
-  (jne @got-a)
-  (movl (@ x8632::misc-data-offset (% arg_y) (% arg_z)) (% temp0))
-  @got-a
-  ;; unboxed a or a[i] now in temp0
-  ;; unboxed b or b[j] now in imm0
-  (movl ($ '1) (% arg_z))		;for outgoing carry
-  (movl (@ borrow (% esp)) (% arg_y))
-  (testl (% arg_y) (% arg_y))		;clears carry flag
-  (jz @sub)
-  (xorl (% arg_y) (% arg_y))
-  (stc)
-  @sub
-  ;; arg_y = 0, arg_z = fixnum 1
+  (cmpl ($ x8632::nil-value) (% arg_z))
+  (cmovnel (@ x8632::misc-data-offset (% arg_y) (% arg_z)) (% temp0))
+  ;; unboxed a or a[i] in temp0, unboxed b or b[j] in imm0
+  (cmpl ($ '1) (@ borrow (% esp)))	;CF = 1 if borrow is 0 else CF = 0
   (sbb (% imm0) (% temp0))
+  (movl ($ 1) (% imm0))
+  (sbb ($ 0) (% imm0))
+  (box-fixnum imm0 arg_z)
+  (movl (% temp0) (% imm0))
+  (mark-as-node temp0)
   (movl (@ r (% esp)) (% temp0))
   (movl (@ k (% esp)) (% temp1))
   (movl (% imm0) (@ x8632::misc-data-offset (% temp0) (% temp1)))
-  (cmovnc (% arg_y) (% arg_z))		;zero outgoing carry if no carry
-  (mark-as-node temp0)
   (single-value-return 7))
+
+#+sse2
+(defx8632lapfunction %subtract-with-borrow ((r 20) (k 16) (borrow 12) (a 8) (i 4) #|(ra 0)|# (b arg_y) (j arg_z))
+  (let ((aa mm2)
+	(bb mm3)
+	(ww mm4))
+    (unbox-fixnum b imm0)
+    (cmpl ($ x8632::nil-value) (% j))
+    ;; if j not nil, get b[j]
+    (cmovnel (@ x8632::misc-data-offset (% b) (% j)) (% imm0))
+    (movd (% imm0) (% bb))
+    (movl (@ a (% esp)) (% arg_y))
+    (movl (@ i (% esp)) (% arg_z))
+    (movl (@ borrow (% esp)) (% temp0))
+    (unbox-fixnum arg_y imm0)
+    (cmpl ($ x8632::nil-value) (% arg_z))
+    ;; if i not nil, get a[i]
+    (cmovnel (@ x8632::misc-data-offset (% arg_y) (% arg_z)) (% imm0))
+    (movd (% imm0) (% aa))
+    (unbox-fixnum temp0 imm0)
+    (subl ($ 1) (% imm0))
+    (movd (% imm0) (% ww))
+    (psubq (% bb) (% aa))
+    (paddq (% ww) (% aa))
+    (movl (@ r (% esp)) (% temp0))
+    (movl (@ k (% esp)) (% temp1))
+    (movd (% aa) (@ x8632::misc-data-offset (% temp0) (% temp1)))
+    (psrlq ($ 32) (% aa))		;carry digit
+    (movd (% aa) (% imm0))
+    (xorl (% arg_z) (% arg_z))
+    (test ($ 1) (% imm0))
+    (cmovzl ($ '1) (% arg_z))
+    (single-value-return 7)))
+
+(defx8632lapfunction %subtract-one ((high arg_y) (low arg_z))
+  (mark-as-imm temp0)
+  (unbox-fixnum low imm0)
+  (movl (% high) (% temp0))
+  (shll ($ (- 16 x8632::fixnumshift)) (% temp0))
+  (orl (% imm0) (% temp0))
+  (subl ($ 1) (% temp0))
+  (movzwl (% temp0.w) (% imm0))
+  (box-fixnum imm0 low)
+  (sarl ($ 16) (% temp0))
+  (box-fixnum temp0 high)
+  (mark-as-node temp0)
+  (push (% high))
+  (push (% low))
+  (movl (% esp) (% temp0))
+  (jmp-subprim .SPvalues))
+
+;;; %SUBTRACT-WITH-BORROW -- Internal.
+;;;
+;;; This should be in assembler, and should not cons intermediate results.  It
+;;; returns a 32bit digit and a borrow resulting from subtracting b from a, and
+;;; subtracting a possible incoming borrow.
+;;;
+;;; We really do:  a - b - 1 + borrow, where borrow is either 0 or 1.
+;;; 
+
+(defx8632lapfunction %subtract-with-borrow-1 ((a-h 12) (a-l 8) (b-h 4) #|(ra 0)|# (b-l arg_y) (borrow arg_z))
+  (mark-as-imm temp0)
+  (mark-as-imm temp1)
+  (unbox-fixnum b-l temp0)
+  (movl (@ b-h (% esp)) (% imm0))
+  (unbox-fixnum imm0 imm0)
+  (shll ($ 16) (% imm0))
+  (orl (% imm0) (% temp0))
+  (movl (@ a-l (% esp)) (% temp1))
+  (unbox-fixnum temp1 temp1)
+  (movl (@ a-h (% esp)) (% imm0))
+  (unbox-fixnum imm0 imm0)
+  (shll ($ 16) (% imm0))
+  (orl (% imm0) (% temp1))
+  (cmpl ($ '1) (@ borrow (% esp)))	;CF = 1 if borrow is 0 else CF = 0
+  (sbbl (% temp0) (% temp1))
+  (movl ($ 1) (% imm0))
+  (subb ($ 0) (% imm0))
+  (box-fixnum imm0 arg_z)
+  (movzwl (% temp1.w) (% imm0))
+  (box-fixnum imm0 imm0)
+  (push (% imm0))			;high
+  (sarl ($ 16) (% temp1))
+  (box-fixnum temp1 imm0)
+  (mark-as-node temp0)
+  (mark-as-node temp1)
+  (push (% imm0))			;low
+  (push (% arg_z))			;borrow
+  (set-nargs 3)
+  (popl (@ 16 (% esp)))			;relocate return addr
+  (addl ($ '4) (% esp))			;discard args, part of saved frame
+  (movl (% esp) (% temp0))
+  (jmp-subprim .SPvalues))
+  
 
 ;;; To normalize a bignum is to drop "trailing" digits which are
@@ -183,11 +299,10 @@
     (je @adjust-length)
     (movl (% next) (% sign))
-    ;; (bignum-ref bignum (- len 2)), i.e., next-to-last digit
+    ;; (bignum-ref bignum (- len 2))
     (movl (@ (- x8632::misc-data-offset 8) (% bignum) (% len)) (% next))
     @test
     (movl (% next) (% imm0))
     (sarl ($ 31) (% imm0))		;propagate sign bit
-    (xorl (% sign) (% imm0))
-    (testl (% imm0) (% imm0))		;whole digit only sign?
+    (xorl (% sign) (% imm0))		;whole digit only sign?
     (jz @loop)
     ;; New length now in len.
@@ -196,7 +311,9 @@
     (cmpl (% len) (% imm0))
     ;; If the new length is the same as the original length, we know
-    ;; that the bignum is at least two digits long, and will never fit
-    ;; in a fixnum.  Therefore, there's no need to do either of the
-    ;; tests at @maybe-return-fixnum.
+    ;; that the bignum is at least two digits long (because if it was
+    ;; shorter, we would have branched directly to
+    ;; @maybe-return-fixnum), and thus won't fit in a fixnum.
+    ;; Therefore, there's no need to do either of the tests at
+    ;; @maybe-return-fixnum.
     (je @done)
     (movl (% len) (% imm0))
@@ -206,5 +323,4 @@
     @maybe-return-fixnum
     ;; could use SETcc here to avoid one branch
-    (pop (% return-fixnum-p))
     (cmpl ($ x8632::nil-value) (@ 0 (% esp))) ;return-fixnum-p
     (je @done)
@@ -222,2 +338,354 @@
     (mark-as-node temp1)
     (single-value-return)))
+
+;;; Multiply X[I] by the unboxed value of the (non-negative) fixnum Y;
+;;; add the incoming carry from CARRY[0] to the 64-bit product.  Store
+;;; the low word of the 64-bit sum in R[0] and the high word in
+;;; CARRY[0].
+(defx8632lapfunction %multiply-and-add ((r 12) (carry 8) (x 4) #|(ra 0)|# (i arg_y) (y arg_z))
+  (let ((xx mm2)
+	(yy mm3)
+	(cc mm4))
+    (movl (@ x (% esp)) (% imm0))
+    (movd (@ x8632::misc-data-offset (% imm0) (% i)) (% xx))
+    (unbox-fixnum y imm0)
+    (movd (% imm0) (% yy))
+    (pmuludq (% xx) (% yy))		;64 bit product
+    (movl (@ carry (% esp)) (% arg_y))
+    (movd (@ x8632::misc-data-offset (% arg_y)) (% cc))
+    (paddq (% cc) (% yy))		;add in 32 bit carry digit
+    (movl (@ r (% esp)) (% arg_z))
+    (movd (% yy) (@ x8632::misc-data-offset (% arg_z)))
+    (psrlq ($ 32) (% yy))
+    (movd (% yy) (@ x8632::misc-data-offset (% arg_y)))
+    (single-value-return 5)))
+
+;; multiply x[i] by y and add to result starting at digit i
+(defx8632lapfunction %multiply-and-add-harder-loop-2
+    ((x 12) (y 8) (r 4) #|(ra 0)|# (i arg_y) (ylen arg_z))
+  (let ((cc mm2)
+	(xx mm3)
+	(yy mm4)
+	(j imm0))
+    (movl (@ x (% esp)) (% temp0))
+    (movd (@ x8632::misc-data-offset (% temp0) (% i)) (% xx)) ;x[i]
+    (movl (@ y (% esp)) (% temp0))
+    (movl (@ r (% esp)) (% temp1))
+    (pxor (% cc) (% cc))
+    (xorl (% j) (% j))
+    @loop
+    (movd (@ x8632::misc-data-offset (% temp0) (% j)) (% yy)) ;y[j]
+    (pmuludq (% xx) (% yy))
+    (paddq (% cc) (% yy))
+    (movd (% yy) (@ x8632::misc-data-offset (% temp1) (% i))) ;store r[i]
+    (movq (% yy) (% cc))
+    (psrlq ($ 32) (% cc))		;carry high digit
+    (addl ($ '1) (% i))
+    (addl ($ '1) (% j))
+    (subl ($ '1) (% ylen))
+    (jg @loop)
+    (movd (% cc) (@ x8632::misc-data-offset (% temp1) (% i)))
+    (single-value-return 5)))
+
+;; this is silly  
+(defx8632lapfunction %add-the-carry ((high 4) #|(ra 0)|# (low arg_y) (c arg_z))
+  (mark-as-imm temp0)
+  (unbox-fixnum low imm0)
+  (movl (@ high (% esp)) (% temp0))
+  (shll ($ (- 16 x8632::fixnumshift)) (% temp0))
+  (orl (% imm0) (% temp0))
+  (unbox-fixnum c imm0)
+  (addl (% imm0) (% temp0))
+  (movzwl (% temp0.w) (% imm0))
+  (box-fixnum imm0 temp1)
+  (sarl ($ 16) (% temp0))
+  (box-fixnum temp0 temp0)
+  (push (% temp0))			;high
+  (push (% temp1))			;low
+  (set-nargs 2)
+  (mark-as-node temp0)
+  (movl (% esp) (% temp0))
+  (jmp-subprim .SPvalues))
+
+(defx8632lapfunction %bignum-count-trailing-zero-bits ((bignum arg_z))
+  (let ((i arg_y)
+	(len temp0)
+	(zeros temp1))
+    (vector-length bignum temp0)
+    (xorl (% i) (% i))
+    (movl ($ '32) (% zeros))
+    @loop
+    (movl (@ x8632::misc-data-offset (% bignum) (% i)) (% imm0))
+    (addl ($ '1) (% i))
+    (addl ($ '32) (% zeros))
+    (testl (% imm0) (% imm0))
+    (jz @loop)
+    ;; now count zero bits in digit
+    (bsrl (% imm0) (% imm0))
+    (box-fixnum imm0 imm0)
+    (addl (% imm0) (% zeros))
+    (movl (% zeros) (% arg_z))
+    (single-value-return)))
+
+;;; dest[i] = (logand x[i] y[i])
+(defx8632lapfunction %bignum-logand ((idx 8) (x 4) #|(ra 0)|# (y arg_y) (dest arg_z))
+  (let ((i temp0)
+	(xx temp1)
+	(yy arg_y))
+    (movl (@ idx (% esp)) (% i))
+    (movl (@ x (% esp)) (% xx))
+    (movl (@ x8632::misc-data-offset (% xx) (% i)) (% imm0))
+    (andl (@ x8632::misc-data-offset (% yy) (% i)) (% imm0))
+    (movl (% imm0) (@ x8632::misc-data-offset (% dest) (% i)))
+    (single-value-return 4)))
+
+;;; dest[i] = (logandc1 x[i] y[i])
+(defx8632lapfunction %bignum-logandc1 ((idx 8) (x 4) #|(ra 0)|# (y arg_y) (dest arg_z))
+  (let ((i temp0)
+	(xx temp1)
+	(yy arg_y))
+    (movl (@ idx (% esp)) (% i))
+    (movl (@ x (% esp)) (% xx))
+    (movl (@ x8632::misc-data-offset (% xx) (% i)) (% imm0))
+    (not (% imm0))
+    (andl (@ x8632::misc-data-offset (% yy) (% i)) (% imm0))
+    (movl (% imm0) (@ x8632::misc-data-offset (% dest) (% i)))
+    (single-value-return 4)))
+
+;;; dest[i] = (logandc2 x[i] y[i])
+(defx8632lapfunction %bignum-logandc2 ((idx 8) (x 4) #|(ra 0)|# (y arg_y) (dest arg_z))
+  (let ((i temp0)
+	(xx temp1)
+	(yy arg_y))
+    (movl (@ idx (% esp)) (% i))
+    (movl (@ x (% esp)) (% xx))
+    (movl (@ x8632::misc-data-offset (% yy) (% i)) (% imm0))
+    (not (% imm0))
+    (andl (@ x8632::misc-data-offset (% xx) (% i)) (% imm0))
+    (movl (% imm0) (@ x8632::misc-data-offset (% dest) (% i)))
+    (single-value-return 4)))
+
+;;; dest[i] = (logior x[i] y[i])
+(defx8632lapfunction %bignum-logior ((idx 8) (x 4) #|(ra 0)|# (y arg_y) (dest arg_z))
+  (let ((i temp0)
+	(xx temp1)
+	(yy arg_y))
+    (movl (@ idx (% esp)) (% i))
+    (movl (@ x (% esp)) (% xx))
+    (movl (@ x8632::misc-data-offset (% xx) (% i)) (% imm0))
+    (orl (@ x8632::misc-data-offset (% yy) (% i)) (% imm0))
+    (movl (% imm0) (@ x8632::misc-data-offset (% dest) (% i)))
+    (single-value-return 4)))
+
+;;; dest[i] = (lognot x[i])
+(defx8632lapfunction %bignum-logior ((idx 4) #|(ra 0)|# (x arg_y) (dest arg_z))
+  (let ((i temp0))
+    (movl (@ idx (% esp)) (% i))
+    (movl (@ x8632::misc-data-offset (% x) (% i)) (% imm0))
+    (not (% imm0))
+    (movl (% imm0) (@ x8632::misc-data-offset (% dest) (% i)))
+    (single-value-return 3)))
+
+;;; dest[i] = (logxor x[i] y[i])
+(defx8632lapfunction %bignum-logxor ((idx 8) (x 4) #|(ra 0)|# (y arg_y) (dest arg_z))
+  (let ((i temp0)
+	(xx temp1)
+	(yy arg_y))
+    (movl (@ idx (% esp)) (% i))
+    (movl (@ x (% esp)) (% xx))
+    (movl (@ x8632::misc-data-offset (% xx) (% i)) (% imm0))
+    (xorl (@ x8632::misc-data-offset (% yy) (% i)) (% imm0))
+    (movl (% imm0) (@ x8632::misc-data-offset (% dest) (% i)))
+    (single-value-return 4)))
+
+(defx8632lapfunction %compare-digits ((a 4) #|(ra 0)|# (b arg_y) (i arg_z))
+  (movl (@ a (% esp)) (% temp0))
+  (movl (@ x8632::misc-data-offset (% temp0) (% i)) (% imm0))
+  (xorl (% temp0) (% temp0))
+  (movl ($ '1) (% temp1))
+  (movl ($ '-1) (% arg_y))
+  (cmpl (@ x8632::misc-data-offset (% b) (% i)) (% imm0))
+  (cmovzl (% temp0) (% arg_z))
+  (cmovgl (% temp1) (% arg_z))
+  (cmovll (% arg_y) (% arg_z))
+  (single-value-return 3))
+
+;; returns number of bits in digit-hi,digit-lo that are sign bits
+;; 32 - digits-sign-bits is integer-length
+(defx8632lapfunction %digits-sign-bits ((hi arg_y) (lo arg_z))
+  (mark-as-imm temp0)
+  (shll ($ (- 16 x8632::fixnumshift)) (% hi))
+  (unbox-fixnum lo imm0)
+  (orl (% hi) (% imm0))
+  (movl (% imm0) (% temp0))
+  (not (% imm0))
+  (testl (% temp0) (% temp0))
+  (js @wasneg)
+  (not (% imm0))
+  @wasneg
+  (bsrl (% imm0) (% imm0))
+  (sete (% temp0.b))
+  (xorl ($ 31) (% imm0))
+  (addb (% temp0.b) (% imm0.b))
+  (box-fixnum imm0 arg_z)
+  (mark-as-node temp0)
+  (single-value-return))
+
+; if dest not nil store unboxed result in dest(0), else return a fixnum
+(defx8632lapfunction fix-digit-logandc2 ((fix 4) #|(ra 0)|# (big arg_y) (dest arg_z)) ; index 0
+  (mark-as-imm temp0)
+  (movl (@ fix (% esp)) (% temp0))
+  (unbox-fixnum temp0 temp0)
+  (movl (@ x8632::misc-data-offset (% big)) (% imm0))
+  (not (% imm0))
+  (andl (% temp0) (% imm0))
+  (mark-as-node temp0)
+  (cmpl ($ x8632::nil-value) (% dest))
+  (jne @store)
+  (box-fixnum imm0 arg_z)
+  (single-value-return 3)
+  @store
+  (movl (% imm0) (@ x8632::misc-data-offset (% dest)))
+  (single-value-return 3))
+
+(defx8632lapfunction digit-lognot-move ((index 4) #|(ra 0)|# (source arg_y) (dest arg_z))
+  (movl (@ index (% esp)) (% temp0))
+  (movl (@ x8632::misc-data-offset (% source) (% temp0)) (% imm0))
+  (not (% imm0))
+  (movl (% imm0) (@ x8632::misc-data-offset (% dest) (% temp0)))
+  (single-value-return 3))
+
+;; Add b to a starting at a[i]
+;; might want not to use SSE2 for this.  use lea to update loop counter
+;; variables so that the flags don't get set.
+(defx8632lapfunction bignum-add-loop-+ ((i 8) (a 4) #|(ra 0)|# (b arg_y) (blen arg_z))
+  (let ((aa mm2)
+	(bb mm3)
+	(cc mm4))
+    (movl (@ a (% esp)) (% temp0))
+    (movl (@ i (% esp)) (% temp1))
+    (xorl (% imm0) (% imm0))
+    (pxor (% cc) (% cc))
+    @loop
+    (movd (@ x8632::misc-data-offset (% temp0) (% temp1)) (% aa))
+    (movd (@ x8632::misc-data-offset (% b) (% imm0)) (% bb))
+    (paddq (% bb) (% aa))
+    (paddq (% cc) (% aa))
+    (movd (% aa) (@ x8632::misc-data-offset (% temp0) (% temp1)))
+    (psrlq ($ 32) (% aa))
+    (movq (% aa) (% cc))
+    (addl ($ '1) (% temp1))
+    (addl ($ '1) (% imm0))
+    (subl ($ '1) (% blen))
+    (jg @loop)
+    ;; add in final carry
+    (movd (% cc) (% imm0))
+    (addl (% imm0) (@ x8632::misc-data-offset (% temp0) (% temp1)))
+    (single-value-return 4)))
+
+(defx8632lapfunction bignum-logtest-loop ((count 4) #|(ra 0)|# (s1 arg_y) (s2 arg_z))
+  (let ((i temp1)
+	(c temp0))
+    (movl (@ count (% esp)) (% c))
+    (xorl (% i) (% i))
+    @loop
+    (movl (@ x8632::misc-data-offset (% s1) (% i)) (% imm0))
+    (test (@ x8632::misc-data-offset (% s2) (% i)) (% imm0))
+    (jnz @true)
+    (addl ($ '1) (% i))
+    (cmpl (% i) (% c))
+    (jg @loop)
+    (movl ($ x8632::nil-value) (% arg_z))
+    (single-value-return 3)
+    @true
+    (movl ($ x8632::t-value) (% arg_z))
+    (single-value-return 3)))
+
+;;; called from bignum-ashift-left-unaligned
+(defx8632lapfunction bignum-shift-left-loop ((nbits 12) (result 8) (bignum 4) #|(ra 0)|# (res-len-1 arg_y) (j arg_z))
+  (let ((r temp0)
+	(b temp1)
+	(bb mm2)
+	(bits mm3)
+	(tt mm4)
+	(remaining-bits mm5))
+    (movl (% j) (% imm0))
+    (subl ($ '1) (% imm0))
+    (pushl (% imm0))			;digits
+    (movl (@ nbits (% esp)) (% imm0))
+    (unbox-fixnum imm0 imm0)
+    (movd (% imm0) (% bits))		;shift count
+    (negl (% imm0))
+    (addl ($ 32) (% imm0))
+    (movd (% imm0) (% remaining-bits))
+    (movl (@ result (% esp)) (% r))
+    (movl (@ bignum (% esp)) (% b))
+    (xorl (% imm0) (% imm0))
+    @loop
+    ;; bignum[i]
+    (movd (@ x8632::misc-data-offset (% b) (% imm0)) (% bb))
+    (psrlq (% remaining-bits) (% bb))
+    ;; bignum[i+1]
+    (movd (@ (+ x8632::misc-data-offset 4) (% b) (% imm0)) (% tt))
+    (psllq (% bits) (% tt))
+    (por (% tt) (% bb))
+    (movd (% bb) (@ x8632::misc-data-offset (% r) (% j)))
+    (addl ($ '1) (% imm0))
+    (addl ($ '1) (% j))
+    (cmpl (% j) (% res-len-1))
+    (jne @loop)
+    (movd (@ x8632::misc-data-offset (% bignum)) (% bb)) ;bignum[0]
+    (psllq (% bits) (% bb))
+    (popl (% arg_y))			;digits
+    (movd (% bb) (@ x8632::misc-data-offset (% r) (% arg_y)))
+    (movd (@ x8632::misc-data-offset (% bignum) (% imm0)) (% bb)) ;bignum[i]
+    (psrlq (% remaining-bits) (% bb))
+    (movd (% bb) (@ x8632::misc-data-offset (% r) (% j)))
+    (single-value-return 5)))
+
+(defx8632lapfunction %logcount-complement ((bignum arg_y) (i arg_z))
+  (mark-as-imm temp0)
+  (let ((rshift imm0)
+	(temp temp0))
+    (movl (@ x8632::misc-data-offset (% bignum) (% i)) (% rshift))
+    (notl (% rshift))
+    (xorl (% arg_z) (% arg_z))
+    (testl (% rshift) (% rshift))
+    (jmp @test)
+    @next
+    (lea (@ -1 (% rshift)) (% temp))
+    (and (% temp) (% rshift))		;sets flags
+    (lea (@ '1 (% arg_z)) (% arg_z))	;doesn't set flags
+    @test
+    (jne @next)
+    (mark-as-node temp0)
+    (single-value-return)))
+
+(defx8632lapfunction %logcount ((bignum arg_y) (i arg_z))
+  (mark-as-imm temp0)
+  (let ((rshift imm0)
+	(temp temp0))
+    (movl (@ x8632::misc-data-offset (% bignum) (% i)) (% rshift))
+    (xorl (% arg_z) (% arg_z))
+    (testl (% rshift) (% rshift))
+    (jmp @test)
+    @next
+    (lea (@ -1 (% rshift)) (% temp))
+    (and (% temp) (% rshift))		;sets flags
+    (lea (@ '1 (% arg_z)) (% arg_z))	;doesn't set flags
+    @test
+    (jne @next)
+    (mark-as-node temp0)
+    (single-value-return)))
+
+
+;;; floor: given x and y, producing q and r, q * y + r = x.
+
+;;; Divide bignum x by single digit y (passed as two halves).
+;;; The quotient in stored in q, and the remainder is returned
+;;; in two halves.
+(defx8632lapfunction %floor-loop-quo ((x 8) (q 4) #|(ra 0)|# (yhi arg_y) (ylo arg_z))
+  )
+
+
+
Index: /branches/ia32/level-0/X86/X8632/x8632-def.lisp
===================================================================
--- /branches/ia32/level-0/X86/X8632/x8632-def.lisp	(revision 7962)
+++ /branches/ia32/level-0/X86/X8632/x8632-def.lisp	(revision 7963)
@@ -17,11 +17,11 @@
 (in-package "CCL")
 
-(defx86lapfunction %function-vector-to-function ((arg arg_z))
-  (single-value-return))
-
-(defx86lapfunction %function-to-function-vector  ((arg arg_z))
-  (single-value-return))
-
-(defx86lapfunction %function-code-words ((fun arg_z))
+(defx8632lapfunction %function-vector-to-function ((arg arg_z))
+  (single-value-return))
+
+(defx8632lapfunction %function-to-function-vector  ((arg arg_z))
+  (single-value-return))
+
+(defx8632lapfunction %function-code-words ((fun arg_z))
   (trap-unless-typecode= fun x8632::subtag-function)
   (movzwl (@ x8632::misc-data-offset (% fun)) (% imm0))
@@ -29,5 +29,5 @@
   (single-value-return))
 
-(defx86lapfunction %nth-immediate ((fun arg_y) (n arg_z))
+(defx8632lapfunction %nth-immediate ((fun arg_y) (n arg_z))
   (trap-unless-typecode= fun x8632::subtag-function)
   (movzwl (@ x8632::misc-data-offset (% fun)) (% imm0))
@@ -36,13 +36,15 @@
   (single-value-return))
 
-(defx86lapfunction %set-nth-immediate ((fun 4) #|(ra 0)|# (n arg_y) (new arg_z))
-  (movl (@ fun (% esp)) (% temp0))
+(defx8632lapfunction %set-nth-immediate ((fun 4) #|(ra 0)|# (n arg_y) (new arg_z))
+  (popl (@ 8 (% esp)))
+  (popl (% temp0))
+  (addl ($ 4) (% esp))
   (trap-unless-typecode= temp0 x8632::subtag-function)
-  (movl (@ x8632::misc-data-offset (% temp0)) (% imm0))
+  (movzwl (@ x8632::misc-data-offset (% temp0)) (% imm0))
   (lea (@ (% n) (% imm0) 4) (% arg_y))
   ;; expects gvector in temp0
   (jmp-subprim .SPgvset))
 
-(defx86lapfunction %function-code-byte ((fun arg_y) (pc arg_z))
+(defx8632lapfunction %function-code-byte ((fun arg_y) (pc arg_z))
   (unbox-fixnum pc imm0)
   (movzbl (@ (% fun) (% imm0)) (% imm0))
@@ -50,179 +52,146 @@
   (single-value-return))
 
-;;; ----
-
-
-;;; Returns 3 values: mask of registers used in the function, stack location
-;;; from which they'd be restored, relative PC at which they're saved. If
-;;; the mask is 0, the values NIL NIL NIL are returned. If either the stack
-;;; location or relative PC is #xff, both of those values will be returned
-;;; as NIL.
-(defx86lapfunction %function-register-usage ((f arg_z))
-  (check-nargs 1)
-  (trap-unless-fulltag= f x8664::fulltag-function)
-  (movzbl (@ -1 (% f)) (% imm0.l))
-  (shll ($ 8) (% imm0.l))
-  (box-fixnum imm0 arg_x)
-  (movq (% rsp) (% temp0))
-  (set-nargs 3)
-  (je @no-regs)
-  (movzbl (@ -2 (% f)) (% imm0.l))
-  (movzbl (@ -3 (% f)) (% imm1.l))
-  (cmpb ($ #xff) (% imm0.b))
-  (je @unencodable)
-  (cmpb ($ #xff) (% imm1.b))
-  (je @unencodable)
+;;; Use the offsets in the self-reference table to replace the :self
+;;; in (movl ($ :self) (% fn)) with the function's actual address.
+(defx8632lapfunction %make-code-executable ((f arg_z))
+  (movzwl (@ x8632::misc-data-offset (% f)) (% imm0))
   (box-fixnum imm0 arg_y)
-  (box-fixnum imm1 arg_z)
-  (push (% arg_x))
-  (push (% arg_y))
-  (push (% arg_z))
-  (jmp-subprim .SPvalues)
-  @unencodable
-  (push (% arg_x))
-  (pushq ($ nil))
-  (pushq ($ nil))
-  (jmp-subprim .SPvalues)
-  @no-regs
-  (pushq ($ nil))
-  (pushq ($ nil))
-  (pushq ($ nil))
-  (jmp-subprim .SPvalues))
-  
-        
-
-(defx86lapfunction %make-code-executable ((codev arg_z))
-  (single-value-return))
-
-         
-
-(defx86lapfunction %get-kernel-global-from-offset ((offset arg_z))
+  (jmp @test)
+  @loop
+  (movl (% f) (@ (% f) (% imm0)))
+  (subl ($ '1) (% arg_y))
+  @test
+  (movl (@ -4 (% f) (% arg_y)) (% imm0))
+  (testl (% imm0) (% imm0))
+  (jnz @loop)
+  (single-value-return))
+
+(defx8632lapfunction %get-kernel-global-from-offset ((offset arg_z))
   (check-nargs 1)
   (unbox-fixnum offset imm0)
-  (movq (@ target::nil-value (% imm0)) (% arg_z))
-  (single-value-return))
-
-(defx86lapfunction %set-kernel-global-from-offset ((offset arg_y) (new-value arg_z))
+  (movl (@ x8632::nil-value (% imm0)) (% arg_z))
+  (single-value-return))
+
+(defx8632lapfunction %set-kernel-global-from-offset ((offset arg_y)
+						     (new-value arg_z))
   (check-nargs 2)
   (unbox-fixnum offset imm0)
-  (movq (% arg_z) (@ target::nil-value (% imm0)))
-  (single-value-return))
-
-
-(defx86lapfunction %get-kernel-global-ptr-from-offset ((offset arg_y)
-						       (ptr arg_z))
+  (movl (% arg_z) (@ x8632::nil-value (% imm0)))
+  (single-value-return))
+
+(defx8632lapfunction %get-kernel-global-ptr-from-offset ((offset arg_y)
+							 (ptr arg_z))
   (check-nargs 2)
   (unbox-fixnum offset imm0)
-  (movq (@ target::nil-value (% imm0)) (% imm0))
-  (movq (% imm0) (@ x8664::macptr.address (% ptr)))
-  (single-value-return))
-
-
-
-
-(defx86lapfunction %fixnum-ref ((fixnum arg_y) #| &optional |# (offset arg_z))
+  (movl (@ x8632::nil-value (% imm0)) (% imm0))
+  (movl (% imm0) (@ x8632::macptr.address (% ptr)))
+  (single-value-return))
+
+(defx8632lapfunction %fixnum-ref ((fixnum arg_y) #| &optional |# (offset arg_z))
   (:arglist (fixnum &optional offset))
   (check-nargs 1 2)
-  (cmpw ($ x8664::fixnumone) (% nargs))
+  (cmpw ($ x8632::fixnumone) (% nargs))
   (jne @2-args)
-  (movq (% offset) (% fixnum))
+  (movl (% offset) (% fixnum))
   (xorl (%l offset) (%l offset))
   @2-args
   (unbox-fixnum offset imm0)
-  (movq (@ (% fixnum) (% imm0)) (% arg_z))
-  (single-value-return))
-
-(defx86lapfunction %fixnum-ref-natural ((fixnum arg_y) #| &optional |# (offset arg_z))
+  (movl (@ (% fixnum) (% imm0)) (% arg_z))
+  (single-value-return))
+
+(defx8632lapfunction %fixnum-ref-natural ((fixnum arg_y) #| &optional |# (offset arg_z))
   (:arglist (fixnum &optional offset))
   (check-nargs 1 2)
-  (cmpw ($ x8664::fixnumone) (% nargs))
+  (cmpw ($ x8632::fixnumone) (% nargs))
   (jne @2-args)
-  (movq (% offset) (% fixnum))
+  (movl (% offset) (% fixnum))
   (xorl (%l offset) (%l offset))
   @2-args
   (unbox-fixnum offset imm0)
-  (movq (@ (% fixnum) (% imm0)) (% imm0))
-  (jmp-subprim .SPmakeu64))
-
-(defx86lapfunction %fixnum-set ((fixnum arg_x) (offset arg_y) #| &optional |# (new-value arg_z))
+  (movl (@ (% fixnum) (% imm0)) (% imm0))
+  (jmp-subprim .SPmakeu32))
+
+(defx8632lapfunction %fixnum-set ((fixnum 4) #|(ra 0)|# (offset arg_y) #| &optional |# (new-value arg_z))
   (:arglist (fixnum offset &optional newval))
   (check-nargs 2 3)
+  (movl (@ fixnum (% esp)) (% temp0))
   (cmpw ($ '2) (% nargs))
   (jne @3-args)
-  (movq (% offset) (% fixnum))
+  (movl (% offset) (% temp0))
   (xorl (%l offset) (%l offset))
   @3-args
   (unbox-fixnum offset imm0)
-  (movq (% new-value) (@ (% fixnum) (% imm0)))
-  (movq (% new-value) (% arg_z))
-  (single-value-return))
-
-
-(defx86lapfunction %fixnum-set-natural ((fixnum arg_x) (offset arg_y) #| &optional |# (new-value arg_z))
+  (movl (% new-value) (@ (% temp0) (% imm0)))
+  (movl (% new-value) (% arg_z))
+  (single-value-return 3))
+
+
+(defx8632lapfunction %fixnum-set-natural ((fixnum 4) #|(ra 0)|# (offset arg_y) #| &optional |# (new-value arg_z))
   (:arglist (fixnum offset &optional newval))
   (check-nargs 2 3)
+  (movl (@ fixnum (% esp)) (% temp0))
   (save-simple-frame)
   (cmpw ($ '2) (% nargs))
   (jne @3-args)
-  (movq (% offset) (% fixnum))
+  (movl (% offset) (% temp0))
   (xorl (%l offset) (%l offset))
   @3-args
-  (call-subprim .SPgetu64)
-  (unbox-fixnum offset imm1)
-  (movq (% imm0) (@ (% fixnum) (% imm1)))
+  (call-subprim .SPgetu32)		;puts u32 in imm0
+  (mark-as-imm temp1)
+  (unbox-fixnum offset temp1)
+  (movl (% imm0) (@ (% temp0) (% temp1)))
+  (mark-as-node temp1)
   (restore-simple-frame)
   (single-value-return))
 
 
-(defx86lapfunction %current-frame-ptr ()
+(defx8632lapfunction %current-frame-ptr ()
   (check-nargs 0)
-  (movq (% rbp) (% arg_z))
-  (single-value-return))
-
-
-(defx86lapfunction %current-tsp ()
+  (movl (% ebp) (% arg_z))
+  (single-value-return))
+
+
+(defx8632lapfunction %current-tsp ()
   (check-nargs 0)
-  (movq (@ (% :rcontext) x8664::tcr.save-tsp) (% arg_z))
-  (single-value-return))
-
-
-(defx86lapfunction %%frame-backlink ((p arg_z))
+  (movl (@ (% :rcontext) x8632::tcr.save-tsp) (% arg_z))
+  (single-value-return))
+
+
+(defx8632lapfunction %%frame-backlink ((p arg_z))
   (check-nargs 1)
-  (movq (@ (% arg_z)) (% arg_z))
-  (single-value-return))
-
-;;; Look for "lea -nnnn(%rip),%fn" AT the tra; if that's present, use
-;;; the dispacement -nnnn to find the function.  The end of the
-;;; encoded displacement is
-;;; x8664::recover-fn-from-rip-disp-offset (= 7) bytes from the tra.
-(defx86lapfunction %return-address-function ((r arg_z))
+  (movl (@ (% arg_z)) (% arg_z))
+  (single-value-return))
+
+;;; Look for "movl $imm32,%fn at the tra;  if present, then $imm32 is
+;;; the address of the function.
+;;;
+;;; That is: #b10111111 <imm32>
+;;;                ^^^^
+;;;   operand size || register number (%fn/%edi)
+
+(defx8632lapfunction %return-address-function ((r arg_z))
   (extract-lisptag r imm0)
-  (cmpb ($ x8664::tag-tra) (% imm0.b))
+  (cmpb ($ x8632::fulltag-tra) (% imm0.b))
   (jne @fail)
-  (cmpw ($ x8664::recover-fn-from-rip-word0) (@ (% r)))
+  (cmpb ($ x8632::recover-fn-opcode-byte) (@ (% r)))
   (jne @fail)
-  (cmpb ($ x8664::recover-fn-from-rip-byte2) (@ 2 (% r)))
-  (movslq (@ x8664::recover-fn-from-rip-disp-offset (% r)) (% imm0))
-  (jne @fail)
-  (lea (@ x8664::recover-fn-from-rip-length (% imm0) (% r)) (% arg_z))
+  (movl (@ x8632::recover-fn-address-offset (% r)) (% arg_z))
   (single-value-return)
   @fail
-  (movl ($ x8664::nil-value) (% arg_z.l))
-  (single-value-return))
-
-(defx86lapfunction %return-address-offset ((r arg_z))
+  (movl ($ x8632::nil-value) (% arg_z))
+  (single-value-return))
+
+;;; xxx this may need twiddling to refer to the right place
+(defx8632lapfunction %return-address-offset ((r arg_z))
   (extract-lisptag r imm0)
-  (cmpb ($ x8664::tag-tra) (% imm0.b))
+  (cmpb ($ x8632::fulltag-tra) (% imm0.b))
   (jne @fail)
-  (cmpw ($ x8664::recover-fn-from-rip-word0) (@ (% r)))
+  (cmpb ($ x8632::recover-fn-opcode-byte) (@ (% r)))
   (jne @fail)
-  (cmpb ($ x8664::recover-fn-from-rip-byte2) (@ 2 (% r)))
-  (movslq (@ x8664::recover-fn-from-rip-disp-offset (% r)) (% imm0))
-  (jne @fail)
-  (negq (% imm0))
-  (leaq (@ (- (ash x8664::recover-fn-from-rip-length x8664::fixnumshift)) (% imm0) 8) (% arg_z))
-  (single-value-return)
+  (movl (@ x8632::recover-fn-address-offset (% r)) (% imm0))
+  (subl (% arg_z) (% imm0))		;offset = tra - fn
+  (box-fixnum imm0 arg_z)
   @fail
-  (movl ($ x8664::nil-value) (% arg_z.l))
+  (movl ($ x8632::nil-value) (% arg_z))
   (single-value-return))
 
@@ -230,148 +199,159 @@
 ;;; frame pointer is the caller of the function that "uses" that frame.
 (defun %cfp-lfun (p)
-  (let* ((ra (%fixnum-ref p x8664::lisp-frame.return-address)))
+  (let* ((ra (%fixnum-ref p x8632::lisp-frame.return-address)))
     (if (eq ra (%get-kernel-global ret1valaddr))
-      (setq ra (%fixnum-ref p x8664::lisp-frame.xtra)))
+      (setq ra (%fixnum-ref p x8632::lisp-frame.xtra)))
     (values (%return-address-function ra) (%return-address-offset ra))))
 
-
-
-(defx86lapfunction %uvector-data-fixnum ((uv arg_z))
+(defx8632lapfunction %uvector-data-fixnum ((uv arg_z))
   (check-nargs 1)
-  (trap-unless-fulltag= arg_z x8664::fulltag-misc)
-  (addq ($ x8664::misc-data-offset) (% arg_z))
-  (single-value-return))
-
-(defx86lapfunction %catch-top ((tcr arg_z))
+  (trap-unless-fulltag= arg_z x8632::fulltag-misc)
+  (addl ($ x8632::misc-data-offset) (% arg_z))
+  (single-value-return))
+
+(defx8632lapfunction %catch-top ((tcr arg_z))
   (check-nargs 1)
-  (movl ($ x8664::nil-value) (%l arg_y))
-  (movq (@ (% :rcontext) x8664::tcr.catch-top) (% arg_z))
+  (movl ($ x8632::nil-value) (% arg_y))
+  (movl (@ (% :rcontext) x8632::tcr.catch-top) (% arg_z))
   (testb (%b arg_z) (%b arg_z))
-  (cmoveq (% arg_y) (% arg_z))
-  (single-value-return))
-
-(defx86lapfunction %catch-tsp ((catch arg_z))
+  (cmovel (% arg_y) (% arg_z))
+  (single-value-return))
+
+(defx8632lapfunction %catch-tsp ((catch arg_z))
   (check-nargs 1)
-  (lea (@  (- (+ target::fulltag-misc
-                                 (ash 1 (1+ target::word-shift)))) (% arg_z))
+  (lea (@  (- (+ x8632::fulltag-misc
+		 (ash 1 (1+ x8632::word-shift)))) (% arg_z))
        (% arg_z))
   (single-value-return))
-
-
 
 ;;; Same as %address-of, but doesn't cons any bignums
 ;;; It also left shift fixnums just like everything else.
-(defx86lapfunction %fixnum-address-of ((x arg_z))
+(defx8632lapfunction %fixnum-address-of ((x arg_z))
   (check-nargs 1)
   (box-fixnum x arg_z)
   (single-value-return))
 
-(defx86lapfunction %save-standard-binding-list ((bindings arg_z))
-  (movq (@ (% :rcontext) x8664::tcr.vs-area) (% imm0))
-  (movq (@ x8664::area.high (% imm0)) (% imm1))
-  (subq ($ x8664::node-size) (% imm1))
-  (movq (% bindings) (@ (% imm1)))
-  (single-value-return))
-
-(defx86lapfunction %saved-bindings-address ()
-  (movq (@ (% :rcontext) x8664::tcr.vs-area) (% imm0))
-  (movq (@ x8664::area.high (% imm0)) (% imm1))
-  (lea (@ (- x8664::node-size) (% imm1)) (% arg_z))
-  (single-value-return))
-
-(defx86lapfunction %get-object ((macptr arg_y) (offset arg_z))
+(defx8632lapfunction %save-standard-binding-list ((bindings arg_z))
+  (mark-as-imm temp0)
+  (movl (@ (% :rcontext) x8632::tcr.vs-area) (% imm0))
+  (movl (@ x8632::area.high (% imm0)) (% temp0))
+  (subl ($ x8632::node-size) (% temp0))
+  (movl (% bindings) (@ (% temp0)))
+  (mark-as-node temp0)
+  (single-value-return))
+
+(defx8632lapfunction %saved-bindings-address ()
+  (mark-as-imm temp0)
+  (movl (@ (% :rcontext) x8632::tcr.vs-area) (% imm0))
+  (movl (@ x8632::area.high (% imm0)) (% temp0))
+  (leal (@ (- x8632::node-size) (% temp0)) (% arg_z))
+  (mark-as-node temp0)
+  (single-value-return))
+
+(defx8632lapfunction %get-object ((macptr arg_y) (offset arg_z))
   (check-nargs 2)
-  (trap-unless-typecode= macptr x8664::subtag-macptr)
+  (trap-unless-typecode= macptr x8632::subtag-macptr)
+  (trap-unless-lisptag= offset x8632::tag-fixnum)
   (macptr-ptr macptr imm0)
-  (trap-unless-lisptag= offset target::tag-fixnum imm1)
-  (unbox-fixnum offset imm1)
-  (movq (@ (% imm0) (% imm1)) (% arg_z))
-  (single-value-return))
-
-
-(defx86lapfunction %set-object ((macptr arg_x) (offset arg_y) (value arg_z))
+  (mark-as-imm temp0)
+  (unbox-fixnum offset temp0)
+  (movl (@ (% imm0) (% temp0)) (% arg_z))
+  (mark-as-node temp0)
+  (single-value-return))
+
+(defx8632lapfunction %set-object ((macptr 4) #|(ra 0)|# (offset arg_y) (value arg_z))
   (check-nargs 3)
-  (trap-unless-typecode= macptr target::subtag-macptr)
-  (macptr-ptr macptr imm0)
-  (trap-unless-lisptag= offset target::tag-fixnum imm1)
-  (unbox-fixnum offset imm1)
-  (movq (% arg_z) (@ (% imm0) (% imm1)))
-  (single-value-return))
-
-(defx86lapfunction %apply-lexpr-with-method-context ((magic arg_x)
-                                                     (function arg_y)
-                                                     (args arg_z))
+  (movl (@ macptr (% esp)) (% temp1))
+  (trap-unless-typecode= temp1 x8632::subtag-macptr)
+  (trap-unless-lisptag= offset x8632::tag-fixnum)
+  (macptr-ptr temp1 imm0)
+  (mark-as-imm temp0)
+  (unbox-fixnum offset temp0)
+  (movl (% arg_z) (@ (% imm0) (% temp0)))
+  (mark-as-node temp0)
+  (single-value-return))
+
+(defx8632lapfunction %apply-lexpr-with-method-context ((magic 4)
+						       #|(ra 0)|#
+						       (function arg_y)
+						       (args arg_z))
   ;; Somebody's called (or tail-called) us.
-  ;; Put magic arg in x8664::next-method-context (= x8664::temp0).
-  ;; Put function in x8664::xfn until we're ready to jump to it.
-  ;; Set nargs to 0, then spread "args" on stack (clobbers arg_x, arg_y, arg_z,
-  ;;   but preserves x866::xfn/x8664::next-method-context.
-  ;; Jump to the function in x8664::xfn.
-  (popq (% ra0))
-  (movq (% magic) (% next-method-context))
-  (movq (% function) (% xfn))
-  (set-nargs 0)
-  (movq (@ (% args)) (% imm0))          ;lexpr-count
-  (movw (% imm0.w) (% nargs))
-  (leaq (@ x8664::node-size (% arg_z) (% imm0)) (% imm1))
-  (subw ($ '3) (% imm0.w))
+  ;; * Put magic arg in x8632::next-method-context (= x8632::temp0).
+  ;; * Put function in x8632::xfn ( = x8632::temp1) until we're ready to
+  ;;   jump to it.
+  ;; * Set nargs to 0, then spread "args" on stack (clobbers arg_y,
+  ;;   arg_z, but preserves x8632::xfn/x8632::next-method-context. Note
+  ;;   that nargs and imm0 are the same register on x8632.
+  ;; * Jump to the function in x8632::xfn.
+  ;; These steps are done in a mixed up order on x8632 because of
+  ;; the small number of registers.
+  (popl (@ (% :rcontext) x8632::tcr.save0))	;save return address
+  (popl (@ (% :rcontext) x8632::tcr.save1))	; and magic arg in the spill area
+  (movl (% function) (% xfn))		;aka temp1
+  (movl (@ (% args)) (% imm0))		;lexpr-count
+  (movd (% imm0) (% mm0))		;save nargs
+  (mark-as-imm temp0)
+  (leal (@ x8632::node-size (% arg_z) (% imm0)) (% temp0))
+  (subl ($ '2) (% imm0))
   (jbe @reg-only)
-  ;; Some args will be pushed; reserve a frame
-  (pushq ($ x8664::reserved-frame-marker))
-  (pushq ($ x8664::reserved-frame-marker))
+  ;; Some args will be pushed; reserve a frame.
+  (pushl ($ x8632::reserved-frame-marker))
+  (pushl ($ x8632::reserved-frame-marker))
   @pushloop
-  (pushq (@ (- x8664::node-size) (% imm1)))
-  (subq ($ x8664::node-size) (% imm1))
-  (subq ($ x8664::node-size) (% imm0))
+  (pushl (@ (- x8632::node-size) (% temp0)))
+  (subl ($ x8632::node-size) (% temp0))
+  (subl ($ x8632::node-size) (% imm0))
   (jne @pushloop)
-  @three
-  (movq (@ (* x8664::node-size 3) (% arg_z)) (% arg_x))
   @two
-  (movq (@ (* x8664::node-size 2) (% arg_z)) (% arg_y))
+  (movl (@ (* x8632::node-size 2) (% arg_z)) (% arg_y))
   @one
-  (movq (@ (* x8664::node-size 1) (% arg_z)) (% arg_z))
+  (movl (@ (* x8632::node-size 1) (% arg_z)) (% arg_z))
   (jmp @go)
   @reg-only
-  (testw (% nargs) (% nargs))
-  (je @go)
-  (rcmpw (% nargs) ($ '2))
-  (je @two)
-  (jb @one)
-  (jmp @three)
+  (movd (% mm0) (% imm0))		;note that imm0 is nargs
+  (rcmp (% nargs) ($ '1))
+  (je @one)
+  (jb @go)
+  (jmp @two)
   @go
-  (push (% ra0))
+  (mark-as-node temp0)
+  (movl (@ (% :rcontext) x8632::tcr.save1) (% next-method-context)) ;aka temp0
+  (pushl (@ (% :rcontext) x8632::tcr.save0))	 ;return address
+  (movapd (% fpzero) (@ (% :rcontext) x8632::tcr.save0)) ;clear out spill area
+  ;; magic arg in next-method-context: check
+  ;; function in xfn: check
+  ;; nargs/imm0 set: check
   (jmp (% xfn)))
 
-(defx86lapfunction %apply-with-method-context ((magic arg_x)
-                                               (function arg_y)
-                                               (args arg_z))
-  ;; Somebody's called (or tail-called) us.
-  ;; Put magic arg in x8664::next-method-context (= x8664::temp0).
-  ;; Put function in x8664::xfn (= x8664::temp1).
-  ;; Set nargs to 0, then spread "args" on stack (clobbers arg_x, arg_y, arg_z,
-  ;;   but preserves x8664::xfn/x8664::next-method-context.
-  ;; Jump to the function in x8664::xfn.
-  (pop (% ra0))  
-  (movq (% magic) (% x8664::next-method-context))
-  (movq (% function) (% x8664::xfn))
-  (movq (% args) (% arg_y))             ; in case of error
-  (set-nargs 0)
-  (xorl (% imm0.l) (% imm0.l))
-  (push (% imm0))                       ; reserve frame (might discard
-  (push (% imm0))                       ; it if nothing is passed on stack.)
+(defx8632lapfunction %apply-with-method-context ((magic 4)
+						 #|(ra 0)|#
+						 (function arg_y)
+						 (args arg_z))
+  ;; Similar to above.
+  (popl (@ (% :rcontext) x8632::tcr.save0))	;save return address
+  (popl (@ (% :rcontext) x8632::tcr.save1))	; and magic arg in the spill area
+  (movl (% args) (@ (% :rcontext) x8632::tcr.save2))	;in case of error
+  (xorl (% imm0) (% imm0))
+  (push (% imm0))		;reserve frame (might discard it
+  (push (% imm0))		;if nothing is passed on stack)
   (cmp-reg-to-nil arg_z)
   (je @done)
+  (mark-as-imm temp0)
   @loop
-  (extract-fulltag arg_z imm1)
-  (cmpb ($ x8664::fulltag-cons) (%b imm1))
-  (jne @bad)
-  (%car arg_z arg_x)
+  (extract-fulltag arg_z temp0)
+  (cmpb ($ x8632::fulltag-cons) (% temp0.b)) ;nil is a cons on x8632, but we
+  (jne @bad)				     ; checked for it already.
+  (%car arg_z temp1)
   (%cdr arg_z arg_z)
-  (lea (@ x8664::node-size (% imm0)) (% imm0))
+  (add ($ '1) (% imm0))			;shorter than lea (imm0 is eax)
   (cmp-reg-to-nil arg_z)
-  (push (% arg_x))
+  (push (% temp1))
   (jne @loop)
+  (mark-as-node temp0)
   @done
-  (addw (% imm0.w) (% nargs))
+  ;; arg_y about to get clobbered; put function into xfn.
+  (movl (% function) (% xfn))		;aka temp1
+  ;; imm0 (aka nargs) contains number of args just pushed
+  (test (% imm0) (% imm0))
   (jne @pop)
   @discard-and-go
@@ -379,23 +359,23 @@
   (jmp @go)
   @pop
-  (cmpw ($ '1) (% nargs))
+  (cmpl ($ '1) (% nargs))
   (pop (% arg_z))
   (je @discard-and-go)
-  (cmpw ($ '2) (% nargs))
+  (cmpl ($ '2) (% nargs))
   (pop (% arg_y))
   (je @discard-and-go)
-  (cmpw ($ '3) (% nargs))
-  (pop (% arg_x))
-  (je @discard-and-go)
   @go
-  (push (% ra0))
-  (jmp (% xfn))
+  (pushl (@ (% :rcontext) x8632::tcr.save0))	 ;return address
+  (movl (@ (% :rcontext) x8632::tcr.save1) (% next-method-context)) ;aka temp0
+  (movapd (% fpzero) (@ (% :rcontext) x8632::tcr.save0)) ;clear out spill area
+  (jmp (% xfn))				 ;aka temp1
   @bad
-  (addq (% imm0) (% rsp))
-  (movq (% arg_y) (% arg_z))
-  (movq ($ (ash $XNOSPREAD x8664::fixnumshift)) (% arg_y))
+  (mark-as-node temp0)
+  (addl (% imm0) (% esp))
+  (movl (@ (% :rcontext) x8632::tcr.save1) (% arg_z)) ;saved args
+  (movapd (% fpzero) (@ (% :rcontext) x8632::tcr.save0)) ;clear out spill area
+  (movl ($ '#.$XNOSPREAD) (% arg_y))
   (set-nargs 2)
   (jmp-subprim .SPksignalerr))
-
 
 ;;; The idea here is to call METHOD in the same stack frame in
@@ -404,30 +384,23 @@
 ;;; must have been tail-called, and the frame built on lexpr
 ;;; entry must be in %rbp.
-(defx86lapfunction %apply-lexpr-tail-wise ((method arg_y) (args arg_z))
-  (addq ($ x8664::node-size) (% rsp))   ; discard extra return address
-  (movq (% method) (% xfn))
-  (movq (% args) (% rsp))
-  (pop (%q nargs))
-  (movq (@ x8664::lisp-frame.return-address (% rbp)) (% ra0))
-  (movq (@ 0 (% rbp)) (% rbp))
-  (rcmpw (% nargs) ($ '3))
+(defx8632lapfunction %apply-lexpr-tail-wise ((method arg_y) (args arg_z))
+  (addl ($ x8632::node-size) (% esp))   ; discard extra return address
+  (movl (% method) (% xfn))		;temp1
+  (movl (% args) (% esp))
+  (popl (% imm0))			;nargs
+  (movl (@ x8632::lisp-frame.return-address (% ebp)) (% temp0))
+  (movl (@ 0 (% ebp)) (% ebp))
+  (rcmpw (% nargs) ($ '2))
   (jbe @pop-regs)
-  ;; More than 3 args; some must have been pushed by caller,
+  ;; More than 2 args; some must have been pushed by caller,
   ;; so retain the reserved frame.
   (pop (% arg_z))
   (pop (% arg_y))
-  (pop (% arg_x))
   (jmp @popped)
   @pop-regs
-  (je @pop3)
   (rcmpw (% nargs) ($ '1))
   (jb @discard)
   (ja @pop2)
   (pop (% arg_z))
-  (jmp @discard)
-  @pop3
-  (pop (% arg_z))
-  (pop (% arg_y))
-  (pop (% arg_x))
   (jmp @discard)
   @pop2
@@ -437,9 +410,13 @@
   (discard-reserved-frame)
   @popped
-  (push (% ra0))
+  (push (% temp0))			;return address
   (jmp (% xfn)))
 
-
-
+(defun closure-function (fun)
+  (while (and (functionp fun) (not (compiled-function-p fun)))
+    (setq fun (%nth-immediate fun 0))
+    (when (vectorp fun)
+      (setq fun (svref fun 0))))
+  fun)
 
 ;;; For use by (setf (apply ...) ...)
@@ -450,48 +427,44 @@
    (:arglist (function arg1 arg2 &rest other-args))
    (check-nargs 3 nil)
-   (cmpw ($ '3) (% nargs))
-   (pop (% ra0))
-   (ja @no-frame)
-   (pushq ($ x8664::reserved-frame-marker))
-   (pushq ($ x8664::reserved-frame-marker))
-@no-frame         
-   (push (% arg_x))
-   (movq (% arg_z) (% temp0))           ; last
-   (movq (% arg_y) (% arg_z))           ; butlast
+   (popl (@ (% :rcontext) x8632::tcr.save0))	;save return address
+   ;; only two arg regs on x8632, so the caller will always push a frame
+   (movl (% arg_z) (% temp0))           ; last
+   (movl (% arg_y) (% arg_z))           ; butlast
    (subw ($ '2) (% nargs))              ; remove count for butlast & last
+   (movd (% imm0) (% mm0))		;save nargs (aka imm0) for later
    ;; Do .SPspreadargz inline here
    (xorl (%l imm0) (%l imm0))
-   (movq (% arg_z) (% arg_y))           ; save in case of error
+   (movl (% arg_z) (@ (% :rcontext) x8632::tcr.save1)) ; save in case of error
    (cmp-reg-to-nil arg_z)
    (je @done)
+   (mark-as-imm temp1)
    @loop
-   (extract-fulltag arg_z imm1)
-   (cmpb ($ x8664::fulltag-cons) (%b imm1))
+   (extract-fulltag arg_z temp1)
+   (cmpb ($ x8664::fulltag-cons) (%b temp1))
    (jne @bad)
-   (%car arg_z arg_x)
+   (%car arg_z arg_y)
    (%cdr arg_z arg_z)
    (addl ($ '1) (%l imm0))
    (cmp-reg-to-nil arg_z)   
-   (push (% arg_x))
+   (push (% arg_y))
    (jne @loop)
+   (mark-as-node temp1)
    @done
    ;; nargs was at least 1 when we started spreading, and can't have gotten
    ;; any smaller. 
-   (addw (%w imm0) (% nargs))
-   (movq (% temp0) (% arg_z))
+   (movd (% mm0) (% arg_y))		;nargs from before loop
+   (addl (% arg_y) (% imm0))		;did I mention nargs is imm0?
+   (movl (% temp0) (% arg_z))
    (pop (% arg_y))
-   (pop (% arg_x))
    (addw ($ '1) (% nargs))
-   (cmpw ($ '3) (% nargs))
-   (jne @no-discard)
-   (discard-reserved-frame)
-   @no-discard
    (load-constant funcall temp0)
-   (push (% ra0))
+   (pushl (@ (% :rcontext) x8632::tcr.save0))	;return address
+   (movapd (% fpzero) (@ (% :rcontext) x8632::tcr.save0)) ;clear out spill area
    (jmp-subprim .SPfuncall)
    @bad                                 ; error spreading list.
-   (add (% imm0) (% rsp))               ; discard whatever's been pushed
-   (movq (% arg_y) (% arg_z))
-   (movl ($ '#.$XNOSPREAD) (%l arg_y))
+   (mark-as-node temp1)
+   (add (% imm0) (% esp))               ; discard whatever's been pushed
+   (movl (@ (% :rcontext) x8632::tcr.save1) (% arg_z))
+   (movl ($ '#.$XNOSPREAD) (% arg_y))
    (set-nargs 2)
    (jmp-subprim .SPksignalerr) ))
@@ -505,4 +478,5 @@
 ;;; (c) re-establish the same foreign stack frame and store the result regs
 ;;;     (%rax/%xmm0) there
+#+notyet
 (defx86lapfunction %do-ff-call ((nfp 0) (frame arg_x) (fp-regs arg_y) (entry arg_z))
   (popq (% ra0))
@@ -531,5 +505,5 @@
   (single-value-return))
   
-
+#+notyet
 (defun %ff-call (entry &rest specs-and-vals)
   (declare (dynamic-extent specs-and-vals))
