Changeset 9552 for branches/ia32/level-0


Ignore:
Timestamp:
May 20, 2008, 6:01:33 AM (11 years ago)
Author:
rme
Message:

Redo bignum-shift-left-loop. Implement truncate-guess-loop,
%multiply-and-add-1.

Correct errors in %floor-99.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • branches/ia32/level-0/X86/X8632/x8632-bignum.lisp

    r9539 r9552  
    654654    (single-value-return 3)))
    655655
     656;;; shift bignum left by nbits bits (1 <= nbits < 32)
     657;;; j is one more than the number of digits in bignum
    656658(defx8632lapfunction bignum-shift-left-loop ((nbits 12) (result 8)
    657659                                             (bignum 4) #|(ra 0)|#
    658660                                             (res-len-1 arg_y) (j arg_z))
    659   (movl (@ nbits (% esp)) (% imm0))
     661  (movl (% ebp) (@ 16 (% esp)))
     662  (leal (@ 16 (% esp)) (% ebp))
     663  (popl (@ 4 (% ebp)))
     664  (push (% arg_y))                      ;ebp - 16
     665  (push (% arg_z))                      ;ebp - 20
     666
     667  (movl (@ -4 (% ebp)) (% imm0))
    660668  (sarl ($ x8632::fixnumshift) (% imm0))
    661669  (movd (% imm0) (% mm7))               ;shift count
    662670  (negl (% imm0))
    663   (addl ($ 32) (% imm0))                ;remaining-bits = 32 - shift-count
     671  (addl ($ 32) (% imm0))
    664672  (movd (% imm0) (% mm6))               ;remaining bits
    665   (movl (@ result (% esp)) (% temp0))
    666   (movl (@ bignum (% esp)) (% temp1))
    667   (push (% arg_z))
    668   (push (% arg_y))
    669   (xorl (% arg_y) (% arg_y))            ;i
    670   (jmp @test)
    671   @loop
    672   (movd (@ x8632::misc-data-offset (% temp1) (% arg_y)) (% mm0)) ;b[i]
    673   (psrlq (% mm6) (% mm0))
    674   (movd (@ (+ 4 x8632::misc-data-offset) (% temp1) (% arg_y)) (% mm1)) ;b[i+1]
    675   (psllq (% mm7) (% mm1))
    676   (por (% mm1) (% mm0))
    677   (movd (% mm0) (@ x8632::misc-data-offset (% temp0) (% arg_z))) ;r[j]
    678   (addl ($ '1) (% arg_y))
    679   (addl ($ '1) (% arg_z))
    680   @test
    681   (cmpl (@ (% esp)) (% j))              ;pity res-len-1 can't stay in a reg
    682   (jne @loop)
    683   (add ($ '1) (% esp))                  ;discard pushed res-len-1
    684   (movd (@ x8632::misc-data-offset (% temp1) (% arg_y)) (% mm0)) ;b[i]
    685   (psrlq (% mm6) (% mm0))
    686   (movd (% mm0) (@ x8632::misc-data-offset (% temp0) (% arg_z))) ;r[j]
    687   ;; reconstitute "digits" arg to bignum-ashift-left-unaligned
    688   (pop (% arg_z))
    689   (subl ($ '1) (% arg_z))
    690   (movd (@ x8632::misc-data-offset (% temp1)) (% mm0)) ;b[0]
    691   (psllq (% mm7) (% mm0))
    692   (movd (% mm0) (@ x8632::misc-data-offset (% temp0) (% arg_z))) ;b[digits]
    693   (single-value-return 5))
     673
     674  (let ((rl-1 -16)
     675        (r temp0)
     676        (b temp1)
     677        (i arg_y)
     678        (i+1 imm0))
     679    (movl (@ -8 (% ebp)) (% r))
     680    (movl (@ -12 (% ebp)) (% b))
     681    (xorl (% i) (% i))
     682    (movl ($ '1) (% i+1))
     683    ;; j (in arg_z) is already (1+ digits)
     684    (jmp @test)
     685    @loop
     686    (movd (@ x8632::misc-data-offset (% b) (% i)) (% mm0))
     687    (psrlq (% mm6) (% mm0))
     688    (movd (@ x8632::misc-data-offset (% b) (% i+1)) (% mm1))
     689    (psllq (% mm7) (% mm1))
     690    (por (% mm1) (% mm0))
     691    (movd (% mm0) (@ x8632::misc-data-offset (% r) (% j)))
     692    (movl (% i+1) (% i))
     693    (addl ($ '1) (% i))
     694    (addl ($ '1) (% j))
     695    @test
     696    (cmpl (@ rl-1 (% ebp)) (% j))
     697    (jne @loop)
     698    (movd (@ x8632::misc-data-offset (% b)) (% mm0))
     699    (psllq (% mm7) (% mm0))
     700    (movl (@ -20 (% ebp)) (% imm0))     ;digits + 1 (that is, the original j)
     701    (subl ($ '1) (% imm0))              ;digits
     702    (movd (% mm0) (@ x8632::misc-data-offset (% r) (% imm0)))
     703    (movd (@ x8632::misc-data-offset (% b) (% i)) (% mm0))
     704    (psrad (% mm6) (% mm0))
     705    (addl ($ '1) (% imm0))              ;original j again
     706    (movd (% mm0) (@ x8632::misc-data-offset (% r) (% j))))
     707  (leave)
     708  (ret))
    694709
    695710;;; shift bignum right by i words plus nbits bits.
     
    834849  (jmp-subprim .SPvalues))
    835850
     851;;; transliterated from bignum-truncate-guess in l0-bignum64.lisp
     852;;; this is not beautiful...
    836853(defx8632lapfunction truncate-guess-loop ((guess-h 16) (guess-l 12) (x 8)
    837854                                          (xidx 4) #|(ra 0)|#
    838855                                          (yptr arg_y) (yidx arg_z))
    839   (int ($ 3)))
     856  (movl (% ebp) (@ 20 (% esp)))
     857  (leal (@ 20 (% esp)) (% ebp))
     858  (popl (@ 4 (% ebp)))
     859  (push (% arg_y))
     860  (push (% arg_z))
     861
     862  (movl (@ -4 (% ebp)) (% temp0))       ;guess-h
     863  (movl (@ -8 (% ebp)) (% temp1))       ;guess-l
     864  (compose-digit temp0 temp1 imm0)
     865  (movd (% imm0) (% mm0))               ;save guess
     866
     867  (movd (@ (- x8632::misc-data-offset 0) (% yptr) (% yidx)) (% mm1)) ;y1 (high)
     868  ;; (%multiply guess y1)
     869  (pmuludq (% mm0) (% mm1))
     870  ;; (%multiply guess y2)
     871  (movd (@ (- x8632::misc-data-offset 4) (% yptr) (% yidx)) (% mm2)) ;y2 (low)
     872  (pmuludq (% mm0) (% mm2))
     873
     874  (movl (@ -12 (% ebp)) (% temp0))       ;x
     875  (movl (@ -16 (% ebp)) (% arg_y))       ;xidx
     876  (mark-as-imm temp1)                    ;edx now unboxed
     877
     878  ;; (%subtract-with-borrow x-i-1 low-guess*y1 1)
     879  (movl (@ (- x8632::misc-data-offset 4) (% temp0) (% arg_y)) (% edx)) ;x-i-1
     880  (movd (% mm1) (% eax))                ;low part of y1*guess
     881  (subl (% eax) (% edx))
     882  (movd (% edx) (% mm6))                ;save middle digit
     883  ;; (%subtract-with-borrow x-i high-guess*y1 borrow)
     884  (movl (@ (- x8632::misc-data-offset 0) (% temp0) (% arg_y)) (% edx)) ;x-i
     885  (movq (% mm1) (% mm3))
     886  (psrlq ($ 32) (% mm3))                ;get high part into low half
     887  (movd (% mm3) (% eax))                ;high part of y1*guess
     888  (sbbl (% eax) (% edx))
     889  (movd (% edx) (% mm7))                ;save high digit
     890  ;; guess is now either good, or one too large
     891  (setc (%b arg_z.bh))                  ;save borrow (arg_z already tag-fixnum)
     892  ;; if (and (= high-digit 0)
     893  (test (% edx) (% edx))
     894  (jne @return)
     895  ;;         (or (> high-guess*y2 middle-digit)
     896  (movq (% mm2) (% mm3))
     897  (psrlq ($ 32) (% mm3))
     898  (movd (% mm3) (% eax))                ;high part of y2*guess
     899  (movd (% mm6) (% edx))                ;middle-digit
     900  (cmpl (% edx) (% eax))
     901  (jg @decrement)
     902  ;;             (and (= middle-digit high-guess*y2)
     903  (jne @decrement)
     904  ;;                  (> low-guess*y2 x-i-2)
     905  (movd (% mm2) (% eax))                ;low part of y2*guess
     906  (movl (@ (- x8632::misc-data-offset 8) (% temp0) (% arg_y)) (% edx)) ;x-i-2
     907  (cmpl (% edx) (% eax))
     908  (jg @decrement)
     909  @return
     910  (mark-as-node edx)
     911  (leave)
     912  (movl (% esp) (% temp0))
     913  (movd (% mm0) (% imm0))
     914  (shrl ($ 16) (% imm0))
     915  (shll ($ x8632::fixnumshift) (% imm0)) ;high half
     916  (push (% imm0))
     917  (movd (% mm0) (% imm0))
     918  (shll ($ 16) (% imm0))
     919  (shrl ($ (- 16 x8632::fixnumshift)) (% imm0))
     920  (push (% imm0))                       ;low half
     921  (set-nargs 2)
     922  (jmp-subprim .SPvalues)
     923  @decrement
     924  (movd (% mm0) (% imm0))               ;guess
     925  (btl ($ 8) (% temp0))                 ;restore state of carry flag
     926  (sbb ($ 1) (% imm0))
     927  (movd (% imm0) (% mm0))
     928  (jmp @return))
    840929
    841930;;; If x[i] = y[j], return the all ones digit (as two halves).
     
    847936  (pop (% temp1))
    848937  (discard-reserved-frame)
    849   (push (% temp1))
    850   (movl (% imm0) (% temp1))
    851   (movl (@ (% temp0) (% temp1)) (% imm0)) ;x[i]
    852   (cmpl (% imm0) (@ (% yptr) (% yidx)))   ;y[j]
     938  (push (% temp0))
     939  (movl (% imm0) (% temp0))
     940  (movl (@ x8632::misc-data-offset (% temp1) (% temp0)) (% imm0)) ;x[i]
     941  (cmpl (% imm0) (@ x8632::misc-data-offset (% yptr) (% yidx)))   ;y[j]
    853942  (jne @more)
    854943  (pushl ($ '#xffff))
     
    859948  @more
    860949  (mark-as-imm edx)                     ;aka temp1 (contains a fixnum)
    861   (movl (@ -4 (% temp0) (% temp1)) (% eax)) ;low digit
    862   (movl (@ (% temp0) (% temp1)) (% edx))    ;high digit
     950  (movl (@ (- x8632::misc-data-offset 4) (% temp1) (% temp0)) (% eax)) ;low
     951  (movl (@ x8632::misc-data-offset (% temp1) (% temp0)) (% edx))    ;high digit
    863952  (divl (@ (% yptr) (% yidx)))
    864953  (mark-as-node edx)
     
    876965  (jmp-subprim .SPvalues))
    877966
     967;;; x * y + carry
    878968(defx8632lapfunction %multiply-and-add-1 ((x-high 16)
    879969                                          (x-low 12)
     
    883973                                          (carry-in-high arg_y)
    884974                                          (carry-in-low arg_z))
    885   (int ($ 3)))
     975  (movl (@ x-high (% esp)) (% temp0))
     976  (movl (@ x-low (% esp)) (% temp1))
     977  (compose-digit temp0 temp1 imm0)
     978  (movd (% imm0) (% mm0))
     979  (movl (@ y-high (% esp)) (% temp0))
     980  (movl (@ y-low (% esp)) (% temp1))
     981  (compose-digit temp0 temp1 imm0)
     982  (movd (% imm0) (% mm1))
     983  (pmuludq (% mm1) (% mm0))             ;x * y
     984  (compose-digit arg_y arg_z imm0)
     985  (movd (% imm0) (% mm1))
     986  (paddq (% mm1) (% mm0))               ;add in carry digit
     987  (movq (% mm0) (% mm1))
     988  (psrlq ($ 32) (% mm1))                ;resultant carry digit
     989  ;; clean up stack
     990  (pop (% temp0))
     991  (addl ($ '6) (% esp))
     992  (push (% temp0))
     993  ;; return (values carry-h carry-l result-h result-l)
     994  (movl (% esp) (% temp0))
     995  (movd (% mm1) (% imm0))
     996  (shrl ($ 16) (% imm0))
     997  (shll ($ x8632::fixnumshift) (% imm0)) ;carry-h
     998  (push (% imm0))
     999  (movd (% mm1) (% imm0))
     1000  (shll ($ 16) (% imm0))
     1001  (shrl ($ (- 16 x8632::fixnumshift)) (% imm0)) ;carry-l
     1002  (push (% imm0))
     1003  (movd (% mm0) (% imm0))
     1004  (shrl ($ 16) (% imm0))
     1005  (shll ($ x8632::fixnumshift) (% imm0)) ;result-h
     1006  (push (% imm0))
     1007  (movd (% mm0) (% imm0))
     1008  (shll ($ 16) (% imm0))
     1009  (shrl ($ (- 16 x8632::fixnumshift)) (% imm0)) ;result-l
     1010  (push (% imm0))
     1011  (set-nargs 4)
     1012  (jmp-subprim .SPvalues))
    8861013
    8871014;;; Copy the limb SRC points to to where DEST points.
Note: See TracChangeset for help on using the changeset viewer.