Index: /branches/ia32/level-0/X86/X8632/x8632-bignum.lisp
===================================================================
--- /branches/ia32/level-0/X86/X8632/x8632-bignum.lisp	(revision 9551)
+++ /branches/ia32/level-0/X86/X8632/x8632-bignum.lisp	(revision 9552)
@@ -654,42 +654,57 @@
     (single-value-return 3)))
 
+;;; shift bignum left by nbits bits (1 <= nbits < 32)
+;;; j is one more than the number of digits in bignum
 (defx8632lapfunction bignum-shift-left-loop ((nbits 12) (result 8)
 					     (bignum 4) #|(ra 0)|#
 					     (res-len-1 arg_y) (j arg_z))
-  (movl (@ nbits (% esp)) (% imm0))
+  (movl (% ebp) (@ 16 (% esp)))
+  (leal (@ 16 (% esp)) (% ebp))
+  (popl (@ 4 (% ebp)))
+  (push (% arg_y))			;ebp - 16
+  (push (% arg_z))			;ebp - 20
+
+  (movl (@ -4 (% ebp)) (% imm0))
   (sarl ($ x8632::fixnumshift) (% imm0))
   (movd (% imm0) (% mm7))		;shift count
   (negl (% imm0))
-  (addl ($ 32) (% imm0))		;remaining-bits = 32 - shift-count
+  (addl ($ 32) (% imm0))
   (movd (% imm0) (% mm6))		;remaining bits
-  (movl (@ result (% esp)) (% temp0))
-  (movl (@ bignum (% esp)) (% temp1))
-  (push (% arg_z))
-  (push (% arg_y))
-  (xorl (% arg_y) (% arg_y))		;i
-  (jmp @test)
-  @loop
-  (movd (@ x8632::misc-data-offset (% temp1) (% arg_y)) (% mm0)) ;b[i]
-  (psrlq (% mm6) (% mm0))
-  (movd (@ (+ 4 x8632::misc-data-offset) (% temp1) (% arg_y)) (% mm1)) ;b[i+1]
-  (psllq (% mm7) (% mm1))
-  (por (% mm1) (% mm0))
-  (movd (% mm0) (@ x8632::misc-data-offset (% temp0) (% arg_z))) ;r[j]
-  (addl ($ '1) (% arg_y))
-  (addl ($ '1) (% arg_z))
-  @test
-  (cmpl (@ (% esp)) (% j))		;pity res-len-1 can't stay in a reg
-  (jne @loop)
-  (add ($ '1) (% esp))			;discard pushed res-len-1
-  (movd (@ x8632::misc-data-offset (% temp1) (% arg_y)) (% mm0)) ;b[i]
-  (psrlq (% mm6) (% mm0))
-  (movd (% mm0) (@ x8632::misc-data-offset (% temp0) (% arg_z))) ;r[j]
-  ;; reconstitute "digits" arg to bignum-ashift-left-unaligned
-  (pop (% arg_z))
-  (subl ($ '1) (% arg_z))
-  (movd (@ x8632::misc-data-offset (% temp1)) (% mm0)) ;b[0]
-  (psllq (% mm7) (% mm0))
-  (movd (% mm0) (@ x8632::misc-data-offset (% temp0) (% arg_z))) ;b[digits]
-  (single-value-return 5))
+
+  (let ((rl-1 -16)
+	(r temp0)
+	(b temp1)
+	(i arg_y)
+	(i+1 imm0))
+    (movl (@ -8 (% ebp)) (% r))
+    (movl (@ -12 (% ebp)) (% b))
+    (xorl (% i) (% i))
+    (movl ($ '1) (% i+1))
+    ;; j (in arg_z) is already (1+ digits)
+    (jmp @test)
+    @loop
+    (movd (@ x8632::misc-data-offset (% b) (% i)) (% mm0))
+    (psrlq (% mm6) (% mm0))
+    (movd (@ x8632::misc-data-offset (% b) (% i+1)) (% mm1))
+    (psllq (% mm7) (% mm1))
+    (por (% mm1) (% mm0))
+    (movd (% mm0) (@ x8632::misc-data-offset (% r) (% j)))
+    (movl (% i+1) (% i))
+    (addl ($ '1) (% i))
+    (addl ($ '1) (% j))
+    @test
+    (cmpl (@ rl-1 (% ebp)) (% j))
+    (jne @loop)
+    (movd (@ x8632::misc-data-offset (% b)) (% mm0))
+    (psllq (% mm7) (% mm0))
+    (movl (@ -20 (% ebp)) (% imm0))	;digits + 1 (that is, the original j)
+    (subl ($ '1) (% imm0))		;digits
+    (movd (% mm0) (@ x8632::misc-data-offset (% r) (% imm0)))
+    (movd (@ x8632::misc-data-offset (% b) (% i)) (% mm0))
+    (psrad (% mm6) (% mm0))
+    (addl ($ '1) (% imm0))		;original j again
+    (movd (% mm0) (@ x8632::misc-data-offset (% r) (% j))))
+  (leave)
+  (ret))
 
 ;;; shift bignum right by i words plus nbits bits.
@@ -834,8 +849,82 @@
   (jmp-subprim .SPvalues))
 
+;;; transliterated from bignum-truncate-guess in l0-bignum64.lisp
+;;; this is not beautiful...
 (defx8632lapfunction truncate-guess-loop ((guess-h 16) (guess-l 12) (x 8)
 					  (xidx 4) #|(ra 0)|#
 					  (yptr arg_y) (yidx arg_z))
-  (int ($ 3)))
+  (movl (% ebp) (@ 20 (% esp)))
+  (leal (@ 20 (% esp)) (% ebp))
+  (popl (@ 4 (% ebp)))
+  (push (% arg_y))
+  (push (% arg_z))
+
+  (movl (@ -4 (% ebp)) (% temp0))	;guess-h
+  (movl (@ -8 (% ebp)) (% temp1))	;guess-l
+  (compose-digit temp0 temp1 imm0)
+  (movd (% imm0) (% mm0))		;save guess
+
+  (movd (@ (- x8632::misc-data-offset 0) (% yptr) (% yidx)) (% mm1)) ;y1 (high)
+  ;; (%multiply guess y1)
+  (pmuludq (% mm0) (% mm1))
+  ;; (%multiply guess y2)
+  (movd (@ (- x8632::misc-data-offset 4) (% yptr) (% yidx)) (% mm2)) ;y2 (low)
+  (pmuludq (% mm0) (% mm2))
+
+  (movl (@ -12 (% ebp)) (% temp0))	 ;x
+  (movl (@ -16 (% ebp)) (% arg_y))	 ;xidx
+  (mark-as-imm temp1)			 ;edx now unboxed
+
+  ;; (%subtract-with-borrow x-i-1 low-guess*y1 1)
+  (movl (@ (- x8632::misc-data-offset 4) (% temp0) (% arg_y)) (% edx)) ;x-i-1
+  (movd (% mm1) (% eax))		;low part of y1*guess
+  (subl (% eax) (% edx))
+  (movd (% edx) (% mm6))		;save middle digit
+  ;; (%subtract-with-borrow x-i high-guess*y1 borrow)
+  (movl (@ (- x8632::misc-data-offset 0) (% temp0) (% arg_y)) (% edx)) ;x-i
+  (movq (% mm1) (% mm3))
+  (psrlq ($ 32) (% mm3))		;get high part into low half
+  (movd (% mm3) (% eax))		;high part of y1*guess
+  (sbbl (% eax) (% edx))
+  (movd (% edx) (% mm7))		;save high digit
+  ;; guess is now either good, or one too large
+  (setc (%b arg_z.bh))			;save borrow (arg_z already tag-fixnum)
+  ;; if (and (= high-digit 0)
+  (test (% edx) (% edx))
+  (jne @return)
+  ;;         (or (> high-guess*y2 middle-digit)
+  (movq (% mm2) (% mm3))
+  (psrlq ($ 32) (% mm3))
+  (movd (% mm3) (% eax))		;high part of y2*guess
+  (movd (% mm6) (% edx))		;middle-digit
+  (cmpl (% edx) (% eax))
+  (jg @decrement)
+  ;;             (and (= middle-digit high-guess*y2)
+  (jne @decrement)
+  ;;                  (> low-guess*y2 x-i-2)
+  (movd (% mm2) (% eax))		;low part of y2*guess
+  (movl (@ (- x8632::misc-data-offset 8) (% temp0) (% arg_y)) (% edx)) ;x-i-2
+  (cmpl (% edx) (% eax))
+  (jg @decrement)
+  @return
+  (mark-as-node edx)
+  (leave)
+  (movl (% esp) (% temp0))
+  (movd (% mm0) (% imm0))
+  (shrl ($ 16) (% imm0))
+  (shll ($ x8632::fixnumshift) (% imm0)) ;high half
+  (push (% imm0))
+  (movd (% mm0) (% imm0))
+  (shll ($ 16) (% imm0))
+  (shrl ($ (- 16 x8632::fixnumshift)) (% imm0))
+  (push (% imm0))			;low half
+  (set-nargs 2)
+  (jmp-subprim .SPvalues)
+  @decrement
+  (movd (% mm0) (% imm0))		;guess
+  (btl ($ 8) (% temp0))			;restore state of carry flag
+  (sbb ($ 1) (% imm0))
+  (movd (% imm0) (% mm0))
+  (jmp @return))
 
 ;;; If x[i] = y[j], return the all ones digit (as two halves).
@@ -847,8 +936,8 @@
   (pop (% temp1))
   (discard-reserved-frame)
-  (push (% temp1))
-  (movl (% imm0) (% temp1))
-  (movl (@ (% temp0) (% temp1)) (% imm0)) ;x[i]
-  (cmpl (% imm0) (@ (% yptr) (% yidx)))	  ;y[j]
+  (push (% temp0))
+  (movl (% imm0) (% temp0))
+  (movl (@ x8632::misc-data-offset (% temp1) (% temp0)) (% imm0)) ;x[i]
+  (cmpl (% imm0) (@ x8632::misc-data-offset (% yptr) (% yidx)))	  ;y[j]
   (jne @more)
   (pushl ($ '#xffff))
@@ -859,6 +948,6 @@
   @more
   (mark-as-imm edx)			;aka temp1 (contains a fixnum)
-  (movl (@ -4 (% temp0) (% temp1)) (% eax)) ;low digit
-  (movl (@ (% temp0) (% temp1)) (% edx))    ;high digit
+  (movl (@ (- x8632::misc-data-offset 4) (% temp1) (% temp0)) (% eax)) ;low
+  (movl (@ x8632::misc-data-offset (% temp1) (% temp0)) (% edx))    ;high digit
   (divl (@ (% yptr) (% yidx)))
   (mark-as-node edx)
@@ -876,4 +965,5 @@
   (jmp-subprim .SPvalues))
 
+;;; x * y + carry
 (defx8632lapfunction %multiply-and-add-1 ((x-high 16)
 					  (x-low 12)
@@ -883,5 +973,42 @@
 					  (carry-in-high arg_y)
 					  (carry-in-low arg_z))
-  (int ($ 3)))
+  (movl (@ x-high (% esp)) (% temp0))
+  (movl (@ x-low (% esp)) (% temp1))
+  (compose-digit temp0 temp1 imm0)
+  (movd (% imm0) (% mm0))
+  (movl (@ y-high (% esp)) (% temp0))
+  (movl (@ y-low (% esp)) (% temp1))
+  (compose-digit temp0 temp1 imm0)
+  (movd (% imm0) (% mm1))
+  (pmuludq (% mm1) (% mm0))		;x * y
+  (compose-digit arg_y arg_z imm0)
+  (movd (% imm0) (% mm1))
+  (paddq (% mm1) (% mm0))		;add in carry digit
+  (movq (% mm0) (% mm1))
+  (psrlq ($ 32) (% mm1))		;resultant carry digit
+  ;; clean up stack
+  (pop (% temp0))
+  (addl ($ '6) (% esp))
+  (push (% temp0))
+  ;; return (values carry-h carry-l result-h result-l) 
+  (movl (% esp) (% temp0))
+  (movd (% mm1) (% imm0))
+  (shrl ($ 16) (% imm0))
+  (shll ($ x8632::fixnumshift) (% imm0)) ;carry-h
+  (push (% imm0))
+  (movd (% mm1) (% imm0))
+  (shll ($ 16) (% imm0))
+  (shrl ($ (- 16 x8632::fixnumshift)) (% imm0)) ;carry-l
+  (push (% imm0))
+  (movd (% mm0) (% imm0))
+  (shrl ($ 16) (% imm0))
+  (shll ($ x8632::fixnumshift) (% imm0)) ;result-h
+  (push (% imm0))
+  (movd (% mm0) (% imm0))
+  (shll ($ 16) (% imm0))
+  (shrl ($ (- 16 x8632::fixnumshift)) (% imm0)) ;result-l
+  (push (% imm0))
+  (set-nargs 4)
+  (jmp-subprim .SPvalues))
 
 ;;; Copy the limb SRC points to to where DEST points.
