source: release/1.4/source/level-0/X86/X8632/x8632-bignum.lisp

Last change on this file was 13075, checked in by R. Matthew Emerson, 15 years ago

Merge trunk changes r13066 through r13067.
(copyright notices)

File size: 38.8 KB
RevLine 
[13075]1;;; Copyright 2009 Clozure Associates
2;;; This file is part of Clozure CL.
3;;;
4;;; Clozure CL is licensed under the terms of the Lisp Lesser GNU
5;;; Public License , known as the LLGPL and distributed with Clozure
6;;; CL as the file "LICENSE". The LLGPL consists of a preamble and
7;;; the LGPL, which is distributed with Clozure CL as the file "LGPL".
8;;; Where these conflict, the preamble takes precedence.
9;;;
10;;; Clozure CL is referenced in the preamble as the "LIBRARY."
11;;;
12;;; The LLGPL is also available online at
13;;; http://opensource.franz.com/preamble.html
14
[7903]15(in-package "CCL")
16
17;;; %BIGNUM-REF needs to access bignums as obviously as possible, and it needs
18;;; to be able to return 32 bits somewhere no one looks for real objects.
19;;;
20;;; The easiest thing to do is to store the 32 raw bits in two fixnums
21;;; and return multiple values.
[7963]22;;;
[7903]23(defx8632lapfunction %bignum-ref ((bignum arg_y) (i arg_z))
[9003]24 (movl (% esp) (% temp0)) ;ptr to return addr on stack in temp0
[7914]25 (movzwl (@ (+ 2 x8632::misc-data-offset) (% bignum) (% i)) (% imm0))
26 (box-fixnum imm0 temp1)
27 (push (% temp1)) ;high
[7903]28 (movzwl (@ x8632::misc-data-offset (% bignum) (% i)) (% imm0))
[7914]29 (box-fixnum imm0 temp1)
30 (push (% temp1)) ;low
[7903]31 (set-nargs 2)
32 (jmp-subprim .SPvalues))
33
[8827]34(defx8632lapfunction %bignum-ref-hi ((bignum arg_y) (i arg_z))
[9606]35 (movzwl (@ (+ 2 x8632::misc-data-offset) (% bignum) (% i)) (% imm0))
36 (box-fixnum imm0 arg_z)
37 (single-value-return))
[8827]38
[7903]39;;; BIGNUM[I] := DIGIT[0]
40(defx8632lapfunction %set-digit ((bignum 4) #|(ra 0)|# (i arg_y) (digit arg_z))
41 (movl (@ bignum (% esp)) (% temp0))
42 (svref digit 0 imm0)
43 (movl (% imm0) (@ x8632::misc-data-offset (% temp0) (% i)))
44 (single-value-return 3))
45
46;;; Return the sign of bignum (0 or -1) as a fixnum
47(defx8632lapfunction %bignum-sign ((bignum arg_z))
48 (vector-length bignum imm0)
[7914]49 (movl (@ (- x8632::misc-data-offset 4) (% bignum) (% imm0)) (% imm0))
50 (sarl ($ 31) (% imm0)) ;propagate sign bit
[7903]51 (box-fixnum imm0 arg_z)
52 (single-value-return))
53
54;;; Count the sign bits in the most significant digit of bignum;
55;;; return fixnum count.
56(defx8632lapfunction %bignum-sign-bits ((bignum arg_z))
[7914]57 (vector-length bignum imm0)
58 (movl (@ (- x8632::misc-data-offset 4) (% bignum) (% imm0)) (% imm0))
[7903]59 (mark-as-imm temp0)
60 (movl (% imm0) (% temp0))
61 (notl (% imm0))
62 (testl (% temp0) (% temp0))
63 (js @wasneg)
64 (notl (% imm0))
65 @wasneg
66 (bsrl (% imm0) (% imm0))
67 (sete (% temp0.b))
68 (xorl ($ 31) (% imm0))
69 (addb (% temp0.b) (% imm0.b))
70 (box-fixnum imm0 arg_z)
71 (mark-as-node temp0)
72 (single-value-return))
73
74(defx8632lapfunction %digit-0-or-plusp ((bignum arg_y) (idx arg_z))
[7907]75 (movl (@ x8632::misc-data-offset (% bignum) (% idx)) (% imm0))
[10959]76 (movl ($ (target-nil-value)) (% temp0))
[7914]77 (leal (@ x8632::t-offset (% temp0)) (% arg_z))
[7907]78 (testl (% imm0) (% imm0))
[7914]79 (cmovll (% temp0) (% arg_z))
[7903]80 (single-value-return))
81
82;;; For oddp, evenp
83(defx8632lapfunction %bignum-oddp ((bignum arg_z))
[7907]84 (movl (@ x8632::misc-data-offset (% bignum)) (% imm0))
[10959]85 (movl ($ (target-nil-value)) (% temp0))
[7914]86 (leal (@ x8632::t-offset (% temp0)) (% arg_z))
87 (testb ($ 1) (% imm0.b))
88 (cmovzl (% temp0) (% arg_z))
[7903]89 (single-value-return))
90
91(defx8632lapfunction bignum-plusp ((bignum arg_z))
92 (vector-length bignum imm0)
[7914]93 (movl (@ (- x8632::misc-data-offset 4) (% bignum) (% imm0)) (% imm0))
[10959]94 (movl ($ (target-nil-value)) (% arg_z))
[9003]95 (lea (@ x8632::t-offset (% arg_z)) (% temp0))
[7914]96 (testl (% imm0) (% imm0))
[9003]97 (cmovnsl (% temp0) (% arg_z))
[7903]98 (single-value-return))
99
100(defx8632lapfunction bignum-minusp ((bignum arg_z))
101 (vector-length bignum imm0)
[7914]102 (movl (@ (- x8632::misc-data-offset 4) (% bignum) (% imm0)) (% imm0))
[10959]103 (movl ($ (target-nil-value)) (% arg_z))
[9003]104 (lea (@ x8632::t-offset (% arg_z)) (% temp0))
[7907]105 (testl (% imm0) (% imm0))
[9003]106 (cmovsl (% temp0) (% arg_z))
[7903]107 (single-value-return))
108
[7914]109;;; Add the digits A[I] and B[J], and the incoming carry C (a fixnum,
110;;; either 0 or 1). Store the result in R[K], and return the outgoing
111;;; carry. If I is NIL, A is a fixnum. If J is NIL, B is a fixnum.
[7903]112(defx8632lapfunction %add-with-carry ((r 20) (k 16) (c 12) (a 8) (i 4) #|(ra 0)|# (b arg_y) (j arg_z))
113 (mark-as-imm temp0)
[7963]114 (unbox-fixnum b imm0)
[10959]115 (cmpl ($ (target-nil-value)) (% j))
[7963]116 ;; if j not nil, get b[j]
117 (cmovnel (@ x8632::misc-data-offset (% b) (% j)) (% imm0))
[7903]118 (movl (@ a (% esp)) (% arg_y))
[7963]119 (unbox-fixnum arg_y temp0)
[7903]120 (movl (@ i (% esp)) (% arg_z))
[10959]121 (cmpl ($ (target-nil-value)) (% arg_z))
[7963]122 ;; if i not nil, get a[i]
123 (cmovnel (@ x8632::misc-data-offset (% arg_y) (% arg_z)) (% temp0))
[9368]124 (xorl (% arg_z) (% arg_z))
[7963]125 ;; I can't think of a better way to set CF at the moment.
126 ;; NEG would be ideal, but we don't have a free imm reg.
127 (btl ($ x8632::fixnumshift) (@ c (% esp))) ;CF = lsb of carry fixnum
[7903]128 (adc (% temp0) (% imm0))
[9368]129 (setc (% arg_z.bh))
130 (sarl ($ (- 8 x8632::fixnumshift)) (% arg_z)) ;outgoing carry
[7963]131 (mark-as-node temp0)
[7903]132 (movl (@ r (% esp)) (% temp0))
133 (movl (@ k (% esp)) (% temp1))
134 (movl (% imm0) (@ x8632::misc-data-offset (% temp0) (% temp1)))
135 (single-value-return 7))
136
[7963]137;;; Add the digits A[I] and B[J], and the incoming carry C (a fixnum).
138;;; Store the result in R[K], and return the outgoing carry. If I is
139;;; NIL, A is a fixnum. If J is NIL, B is a fixnum.
140#+sse2
141(defx8632lapfunction %add-with-carry ((r 20) (k 16) (c 12) (a 8) (i 4) #|(ra 0)|# (b arg_y) (j arg_z))
142 (let ((aa mm2)
143 (bb mm3)
144 (cc mm4))
145 (unbox-fixnum b imm0) ;assume j will be nil
[10959]146 (cmpl ($ (target-nil-value)) (% j))
[7963]147 ;; if j not nil, get b[j]
148 (cmovnel (@ x8632::misc-data-offset (% b) (% j)) (% imm0))
149 (movd (% imm0) (% bb))
150 (movl (@ a (% esp)) (% arg_y))
151 (movl (@ i (% esp)) (% arg_z))
152 (movl (@ c (% esp)) (% temp0))
153 (unbox-fixnum arg_y imm0) ;assume i will be nil
[10959]154 (cmpl ($ (target-nil-value)) (% arg_z))
[7963]155 ;; if i not nil, get a[i]
156 (cmovnel (@ x8632::misc-data-offset (% arg_y) (% arg_z)) (% imm0))
157 (movd (% imm0) (% aa))
158 (unbox-fixnum temp0 imm0)
159 (movd (% imm0) (% cc))
160 (paddq (% xx) (% yy))
161 (paddq (% cc) (% yy))
162 (movl (@ r (% esp)) (% temp0))
163 (movl (@ k (% esp)) (% temp1))
164 (movd (% yy) (@ x8632::misc-data-offset (% temp0) (% temp1)))
165 (psrlq ($ 32) (% yy)) ;carry bit
166 (movd (% yy) (% imm0))
167 (box-fixnum imm0 arg_z)
168 (single-value-return 7)))
169
[7903]170;;; Store the result of A[I] - B[J] - borrow into R[K], returning the borrow.
171;;; If I is NIL, A is a fixnum; likewise for J and B.
[7963]172;;;
173;;; (a - b) - (1 - borrow), or equivalently, (a - b) + borrow - 1
174;;;
175;;; Note: borrow is 1 for no borrow and 0 for a borrow.
[7903]176(defx8632lapfunction %subtract-with-borrow ((r 20) (k 16) (borrow 12) (a 8) (i 4) #|(ra 0)|# (b arg_y) (j arg_z))
177 (mark-as-imm temp0)
[7963]178 (unbox-fixnum b imm0)
[10959]179 (cmpl ($ (target-nil-value)) (% j))
[7963]180 (cmovnel (@ x8632::misc-data-offset (% b) (% j)) (% imm0))
[7903]181 (movl (@ a (% esp)) (% arg_y))
[7963]182 (unbox-fixnum arg_y temp0)
[7903]183 (movl (@ i (% esp)) (% arg_z))
[10959]184 (cmpl ($ (target-nil-value)) (% arg_z))
[7963]185 (cmovnel (@ x8632::misc-data-offset (% arg_y) (% arg_z)) (% temp0))
186 ;; unboxed a or a[i] in temp0, unboxed b or b[j] in imm0
187 (cmpl ($ '1) (@ borrow (% esp))) ;CF = 1 if borrow is 0 else CF = 0
[7903]188 (sbb (% imm0) (% temp0))
[7963]189 (movl ($ 1) (% imm0))
190 (sbb ($ 0) (% imm0))
191 (box-fixnum imm0 arg_z)
192 (movl (% temp0) (% imm0))
193 (mark-as-node temp0)
[7903]194 (movl (@ r (% esp)) (% temp0))
195 (movl (@ k (% esp)) (% temp1))
196 (movl (% imm0) (@ x8632::misc-data-offset (% temp0) (% temp1)))
197 (single-value-return 7))
[7914]198
[7963]199#+sse2
200(defx8632lapfunction %subtract-with-borrow ((r 20) (k 16) (borrow 12) (a 8) (i 4) #|(ra 0)|# (b arg_y) (j arg_z))
201 (let ((aa mm2)
202 (bb mm3)
203 (ww mm4))
204 (unbox-fixnum b imm0)
[10959]205 (cmpl ($ (target-nil-value)) (% j))
[7963]206 ;; if j not nil, get b[j]
207 (cmovnel (@ x8632::misc-data-offset (% b) (% j)) (% imm0))
208 (movd (% imm0) (% bb))
209 (movl (@ a (% esp)) (% arg_y))
210 (movl (@ i (% esp)) (% arg_z))
211 (movl (@ borrow (% esp)) (% temp0))
212 (unbox-fixnum arg_y imm0)
[10959]213 (cmpl ($ (target-nil-value)) (% arg_z))
[7963]214 ;; if i not nil, get a[i]
215 (cmovnel (@ x8632::misc-data-offset (% arg_y) (% arg_z)) (% imm0))
216 (movd (% imm0) (% aa))
217 (unbox-fixnum temp0 imm0)
218 (subl ($ 1) (% imm0))
219 (movd (% imm0) (% ww))
220 (psubq (% bb) (% aa))
221 (paddq (% ww) (% aa))
222 (movl (@ r (% esp)) (% temp0))
223 (movl (@ k (% esp)) (% temp1))
224 (movd (% aa) (@ x8632::misc-data-offset (% temp0) (% temp1)))
225 (psrlq ($ 32) (% aa)) ;carry digit
226 (movd (% aa) (% imm0))
227 (xorl (% arg_z) (% arg_z))
228 (test ($ 1) (% imm0))
229 (cmovzl ($ '1) (% arg_z))
230 (single-value-return 7)))
231
232(defx8632lapfunction %subtract-one ((high arg_y) (low arg_z))
[10923]233 (shll ($ (- 16 x8632::fixnumshift)) (% arg_y))
[7963]234 (unbox-fixnum low imm0)
[10923]235 ;; high half should always be clear...
236 ;;(movzwl (% imm0.w) (% imm0))
237 (orl (% arg_y) (% imm0))
238 (decl (% imm0))
[7987]239 (movl (% esp) (% temp0))
[10923]240 ;; extract and push high half
241 (movl ($ (- #x10000)) (% arg_y))
242 (andl (% imm0) (% arg_y))
243 (shrl ($ (- 16 x8632::fixnumshift)) (% arg_y))
244 (push (% arg_y))
245 ;; low half
246 (andl ($ #xffff) (% imm0))
247 (shll ($ x8632::fixnumshift) (% imm0))
248 (push (% imm0))
249 (set-nargs 2)
[7963]250 (jmp-subprim .SPvalues))
251
252;;; %SUBTRACT-WITH-BORROW -- Internal.
253;;;
254;;; This should be in assembler, and should not cons intermediate results. It
255;;; returns a 32bit digit and a borrow resulting from subtracting b from a, and
256;;; subtracting a possible incoming borrow.
257;;;
258;;; We really do: a - b - 1 + borrow, where borrow is either 0 or 1.
259;;;
260
261(defx8632lapfunction %subtract-with-borrow-1 ((a-h 12) (a-l 8) (b-h 4) #|(ra 0)|# (b-l arg_y) (borrow arg_z))
262 (mark-as-imm temp0)
263 (mark-as-imm temp1)
264 (unbox-fixnum b-l temp0)
265 (movl (@ b-h (% esp)) (% imm0))
[9624]266 (sarl ($ x8632::fixnumshift) (% imm0))
[7963]267 (shll ($ 16) (% imm0))
[9624]268 (orl (% imm0) (% temp0)) ;b in temp0
[7963]269 (movl (@ a-l (% esp)) (% temp1))
[9624]270 (sarl ($ x8632::fixnumshift) (% temp1))
[7963]271 (movl (@ a-h (% esp)) (% imm0))
[9624]272 (sarl ($ x8632::fixnumshift) (% imm0))
[7963]273 (shll ($ 16) (% imm0))
[9624]274 (orl (% imm0) (% temp1)) ;a in temp1
275
276 (unbox-fixnum borrow imm0)
277 (subl ($ 1) (% imm0)) ;sets carry appropriately
[7963]278 (sbbl (% temp0) (% temp1))
[9624]279 (setae (%b imm0)) ;resulting borrow (1 for no, 0 for yes)
[10923]280 (movzbl (%b imm0) (% imm0))
[7963]281 (box-fixnum imm0 arg_z)
[9624]282 (movl (% temp1) (% imm0))
283 (andl ($ (- #x10000)) (% imm0))
284 (shrl ($ (- 16 x8632::fixnumshift)) (% imm0))
[7987]285 (popl (% arg_y)) ;return address
286 (addl ($ '5) (% esp)) ;discard reserved frame & stack args
287 (pushl (% arg_y))
[7963]288 (push (% imm0)) ;high
[9624]289 (andl ($ #xffff) (% temp1))
[7963]290 (box-fixnum temp1 imm0)
291 (mark-as-node temp0)
292 (mark-as-node temp1)
293 (push (% imm0)) ;low
294 (push (% arg_z)) ;borrow
295 (set-nargs 3)
[7987]296 (leal (@ '3 (% esp)) (% temp0))
[7963]297 (jmp-subprim .SPvalues))
298
299
[7914]300;;; To normalize a bignum is to drop "trailing" digits which are
[7916]301;;; redundant sign information. When return-fixnum-p is non-nil, make
[7914]302;;; the resultant bignum into a fixnum if it fits.
303(defx8632lapfunction %normalize-bignum-2 ((return-fixnum-p arg_y) (bignum arg_z))
304 (push (% return-fixnum-p))
305 (mark-as-imm temp0)
306 (mark-as-imm temp1)
307 (let ((len arg_y)
308 (sign temp0)
309 (next temp1))
310 (vector-length bignum len)
311 (cmpl ($ '1) (% len))
[7916]312 (jle @maybe-return-fixnum)
313 ;; Zero trailing sign digits.
314 (push (% len))
[7914]315 ;; next-to-last digit
316 (movl (@ (- x8632::misc-data-offset 8) (% bignum) (% len)) (% next))
317 ;; last digit
318 (movl (@ (- x8632::misc-data-offset 4) (% bignum) (% len)) (% sign))
319 (jmp @test)
320 @loop
321 (subl ($ '1) (% len))
322 (movl ($ 0) (@ x8632::misc-data-offset (% bignum) (% len)))
323 (cmpl ($ '1) (% len)) ;any more digits?
[7916]324 (je @adjust-length)
[7914]325 (movl (% next) (% sign))
[7963]326 ;; (bignum-ref bignum (- len 2))
[7915]327 (movl (@ (- x8632::misc-data-offset 8) (% bignum) (% len)) (% next))
[7914]328 @test
329 (movl (% next) (% imm0))
330 (sarl ($ 31) (% imm0)) ;propagate sign bit
[7963]331 (xorl (% sign) (% imm0)) ;whole digit only sign?
[7914]332 (jz @loop)
[7916]333 ;; New length now in len.
334 @adjust-length
335 (pop (% imm0)) ;original length
336 (cmpl (% len) (% imm0))
337 ;; If the new length is the same as the original length, we know
[7963]338 ;; that the bignum is at least two digits long (because if it was
339 ;; shorter, we would have branched directly to
340 ;; @maybe-return-fixnum), and thus won't fit in a fixnum.
341 ;; Therefore, there's no need to do either of the tests at
342 ;; @maybe-return-fixnum.
[7916]343 (je @done)
344 (movl (% len) (% imm0))
345 (shll ($ (- x8632::num-subtag-bits x8632::fixnumshift)) (% imm0))
346 (movb ($ x8632::subtag-bignum) (% imm0.b))
347 (movl (% imm0) (@ x8632::misc-header-offset (% bignum)))
348 @maybe-return-fixnum
349 ;; could use SETcc here to avoid one branch
[10959]350 (cmpl ($ (target-nil-value)) (@ 0 (% esp))) ;return-fixnum-p
[7914]351 (je @done)
[9567]352 (cmpl ($ x8632::one-digit-bignum-header)
353 (@ x8632::misc-header-offset (% bignum)))
[7916]354 (jne @done)
[7914]355 ;; Bignum has one digit. If it fits in a fixnum, return a fixnum.
356 (movl (@ x8632::misc-data-offset (% bignum)) (% imm0))
357 (box-fixnum imm0 arg_y)
[7916]358 (unbox-fixnum arg_y temp0)
359 (cmpl (% temp0) (% imm0))
360 (cmovel (% arg_y) (% arg_z))
[7914]361 @done
[7916]362 (pop (% imm0)) ;discard saved return-fixnum-p
[7914]363 (mark-as-node temp0)
364 (mark-as-node temp1)
[7963]365 (single-value-return)))
366
367;;; Multiply X[I] by the unboxed value of the (non-negative) fixnum Y;
368;;; add the incoming carry from CARRY[0] to the 64-bit product. Store
369;;; the low word of the 64-bit sum in R[0] and the high word in
370;;; CARRY[0].
371(defx8632lapfunction %multiply-and-add ((r 12) (carry 8) (x 4) #|(ra 0)|# (i arg_y) (y arg_z))
372 (let ((xx mm2)
373 (yy mm3)
374 (cc mm4))
375 (movl (@ x (% esp)) (% imm0))
376 (movd (@ x8632::misc-data-offset (% imm0) (% i)) (% xx))
377 (unbox-fixnum y imm0)
378 (movd (% imm0) (% yy))
379 (pmuludq (% xx) (% yy)) ;64 bit product
380 (movl (@ carry (% esp)) (% arg_y))
381 (movd (@ x8632::misc-data-offset (% arg_y)) (% cc))
382 (paddq (% cc) (% yy)) ;add in 32 bit carry digit
383 (movl (@ r (% esp)) (% arg_z))
384 (movd (% yy) (@ x8632::misc-data-offset (% arg_z)))
385 (psrlq ($ 32) (% yy))
386 (movd (% yy) (@ x8632::misc-data-offset (% arg_y)))
387 (single-value-return 5)))
388
389;; multiply x[i] by y and add to result starting at digit i
390(defx8632lapfunction %multiply-and-add-harder-loop-2
391 ((x 12) (y 8) (r 4) #|(ra 0)|# (i arg_y) (ylen arg_z))
392 (let ((cc mm2)
393 (xx mm3)
394 (yy mm4)
[9488]395 (rr mm5)
[7963]396 (j imm0))
397 (movl (@ x (% esp)) (% temp0))
398 (movd (@ x8632::misc-data-offset (% temp0) (% i)) (% xx)) ;x[i]
399 (movl (@ y (% esp)) (% temp0))
400 (movl (@ r (% esp)) (% temp1))
401 (pxor (% cc) (% cc))
402 (xorl (% j) (% j))
403 @loop
404 (movd (@ x8632::misc-data-offset (% temp0) (% j)) (% yy)) ;y[j]
405 (pmuludq (% xx) (% yy))
[9488]406 ;; 64-bit product now in %yy
407 (movd (@ x8632::misc-data-offset (% temp1) (% i)) (% rr))
408 ;; add in digit from r[i]
409 (paddq (% yy) (% rr))
410 ;; add in carry
411 (paddq (% cc) (% rr))
412 (movd (% rr) (@ x8632::misc-data-offset (% temp1) (% i))) ;update r[i]
413 (movq (% rr) (% cc))
414 (psrlq ($ 32) (% cc)) ;get carry digit into low word
[7963]415 (addl ($ '1) (% i))
416 (addl ($ '1) (% j))
417 (subl ($ '1) (% ylen))
418 (jg @loop)
419 (movd (% cc) (@ x8632::misc-data-offset (% temp1) (% i)))
420 (single-value-return 5)))
421
422;; this is silly
423(defx8632lapfunction %add-the-carry ((high 4) #|(ra 0)|# (low arg_y) (c arg_z))
424 (mark-as-imm temp0)
[7987]425 (let ((imm1 temp0)
426 (imm1.w temp0.w))
427 (pop (% temp1))
428 (popl (% imm1)) ;high
429 (discard-reserved-frame)
430 (push (% temp1))
431 (shll ($ (- 16 x8632::fixnumshift)) (% temp0))
432 (unbox-fixnum low imm0)
433 (orl (% imm0) (% imm1))
434 (unbox-fixnum c imm0)
435 (addl (% imm0) (% imm1))
436 (movzwl (% imm1.w) (% imm0))
437 (box-fixnum imm0 temp1)
438 (sarl ($ 16) (% imm1))
439 (shll ($ x8632::fixnumshift) (% imm1))
440 (push (% imm1)) ;high
441 (push (% temp1))) ;low
442 (mark-as-node temp0)
[7963]443 (set-nargs 2)
[7987]444 (leal (@ '2 (% esp)) (% temp0))
[7963]445 (jmp-subprim .SPvalues))
446
447(defx8632lapfunction %bignum-count-trailing-zero-bits ((bignum arg_z))
448 (let ((i arg_y)
449 (len temp0)
450 (zeros temp1))
451 (vector-length bignum temp0)
452 (xorl (% i) (% i))
[9605]453 (xorl (% zeros) (% zeros))
[7963]454 @loop
455 (movl (@ x8632::misc-data-offset (% bignum) (% i)) (% imm0))
[9605]456 (testl (% imm0) (% imm0))
457 (jnz @last)
458 (addl ($ '32) (% zeros))
[7963]459 (addl ($ '1) (% i))
[9605]460 (cmpl (% len) (% i))
461 (jb @loop)
462 @last
[7963]463 ;; now count zero bits in digit
[9605]464 (bsfl (% imm0) (% imm0))
465 (shll ($ x8632::fixnumshift) (% imm0))
[7963]466 (addl (% imm0) (% zeros))
467 (movl (% zeros) (% arg_z))
468 (single-value-return)))
469
470;;; dest[i] = (logand x[i] y[i])
471(defx8632lapfunction %bignum-logand ((idx 8) (x 4) #|(ra 0)|# (y arg_y) (dest arg_z))
472 (let ((i temp0)
473 (xx temp1)
474 (yy arg_y))
475 (movl (@ idx (% esp)) (% i))
476 (movl (@ x (% esp)) (% xx))
477 (movl (@ x8632::misc-data-offset (% xx) (% i)) (% imm0))
478 (andl (@ x8632::misc-data-offset (% yy) (% i)) (% imm0))
479 (movl (% imm0) (@ x8632::misc-data-offset (% dest) (% i)))
480 (single-value-return 4)))
481
482;;; dest[i] = (logandc1 x[i] y[i])
483(defx8632lapfunction %bignum-logandc1 ((idx 8) (x 4) #|(ra 0)|# (y arg_y) (dest arg_z))
484 (let ((i temp0)
485 (xx temp1)
486 (yy arg_y))
487 (movl (@ idx (% esp)) (% i))
488 (movl (@ x (% esp)) (% xx))
489 (movl (@ x8632::misc-data-offset (% xx) (% i)) (% imm0))
490 (not (% imm0))
491 (andl (@ x8632::misc-data-offset (% yy) (% i)) (% imm0))
492 (movl (% imm0) (@ x8632::misc-data-offset (% dest) (% i)))
493 (single-value-return 4)))
494
495;;; dest[i] = (logandc2 x[i] y[i])
496(defx8632lapfunction %bignum-logandc2 ((idx 8) (x 4) #|(ra 0)|# (y arg_y) (dest arg_z))
497 (let ((i temp0)
498 (xx temp1)
499 (yy arg_y))
500 (movl (@ idx (% esp)) (% i))
501 (movl (@ x (% esp)) (% xx))
502 (movl (@ x8632::misc-data-offset (% yy) (% i)) (% imm0))
503 (not (% imm0))
504 (andl (@ x8632::misc-data-offset (% xx) (% i)) (% imm0))
505 (movl (% imm0) (@ x8632::misc-data-offset (% dest) (% i)))
506 (single-value-return 4)))
507
508;;; dest[i] = (logior x[i] y[i])
509(defx8632lapfunction %bignum-logior ((idx 8) (x 4) #|(ra 0)|# (y arg_y) (dest arg_z))
510 (let ((i temp0)
511 (xx temp1)
512 (yy arg_y))
513 (movl (@ idx (% esp)) (% i))
514 (movl (@ x (% esp)) (% xx))
515 (movl (@ x8632::misc-data-offset (% xx) (% i)) (% imm0))
516 (orl (@ x8632::misc-data-offset (% yy) (% i)) (% imm0))
517 (movl (% imm0) (@ x8632::misc-data-offset (% dest) (% i)))
518 (single-value-return 4)))
519
520;;; dest[i] = (lognot x[i])
[8213]521(defx8632lapfunction %bignum-lognot ((idx 4) #|(ra 0)|# (x arg_y) (dest arg_z))
[7963]522 (let ((i temp0))
523 (movl (@ idx (% esp)) (% i))
524 (movl (@ x8632::misc-data-offset (% x) (% i)) (% imm0))
525 (not (% imm0))
526 (movl (% imm0) (@ x8632::misc-data-offset (% dest) (% i)))
527 (single-value-return 3)))
528
529;;; dest[i] = (logxor x[i] y[i])
530(defx8632lapfunction %bignum-logxor ((idx 8) (x 4) #|(ra 0)|# (y arg_y) (dest arg_z))
531 (let ((i temp0)
532 (xx temp1)
533 (yy arg_y))
534 (movl (@ idx (% esp)) (% i))
535 (movl (@ x (% esp)) (% xx))
536 (movl (@ x8632::misc-data-offset (% xx) (% i)) (% imm0))
537 (xorl (@ x8632::misc-data-offset (% yy) (% i)) (% imm0))
538 (movl (% imm0) (@ x8632::misc-data-offset (% dest) (% i)))
539 (single-value-return 4)))
540
[9269]541;;; 0 if a[i] = b[i]; 1 if a[i] > b[i]; -1 if a[i] < b[i]
[7963]542(defx8632lapfunction %compare-digits ((a 4) #|(ra 0)|# (b arg_y) (i arg_z))
543 (movl (@ a (% esp)) (% temp0))
544 (movl (@ x8632::misc-data-offset (% temp0) (% i)) (% imm0))
[9269]545 (movl ($ '1) (% temp0))
546 (movl ($ '-1) (% temp1))
547 (subl (@ x8632::misc-data-offset (% b) (% i)) (% imm0))
[9776]548 (cmoval (% temp0) (% imm0))
549 (cmovbl (% temp1) (% imm0))
[9269]550 (movl (% imm0) (% arg_z))
[7963]551 (single-value-return 3))
552
553;; returns number of bits in digit-hi,digit-lo that are sign bits
554;; 32 - digits-sign-bits is integer-length
555(defx8632lapfunction %digits-sign-bits ((hi arg_y) (lo arg_z))
556 (mark-as-imm temp0)
557 (shll ($ (- 16 x8632::fixnumshift)) (% hi))
558 (unbox-fixnum lo imm0)
559 (orl (% hi) (% imm0))
560 (movl (% imm0) (% temp0))
561 (not (% imm0))
562 (testl (% temp0) (% temp0))
563 (js @wasneg)
564 (not (% imm0))
565 @wasneg
566 (bsrl (% imm0) (% imm0))
567 (sete (% temp0.b))
568 (xorl ($ 31) (% imm0))
569 (addb (% temp0.b) (% imm0.b))
570 (box-fixnum imm0 arg_z)
571 (mark-as-node temp0)
572 (single-value-return))
573
[9400]574(defx8632lapfunction macptr->fixnum ((ptr arg_z))
575 (macptr-ptr arg_z ptr)
576 (single-value-return))
577
[7963]578; if dest not nil store unboxed result in dest(0), else return a fixnum
579(defx8632lapfunction fix-digit-logandc2 ((fix 4) #|(ra 0)|# (big arg_y) (dest arg_z)) ; index 0
580 (mark-as-imm temp0)
581 (movl (@ fix (% esp)) (% temp0))
582 (unbox-fixnum temp0 temp0)
583 (movl (@ x8632::misc-data-offset (% big)) (% imm0))
584 (not (% imm0))
585 (andl (% temp0) (% imm0))
586 (mark-as-node temp0)
[10959]587 (cmpl ($ (target-nil-value)) (% dest))
[7963]588 (jne @store)
589 (box-fixnum imm0 arg_z)
590 (single-value-return 3)
591 @store
592 (movl (% imm0) (@ x8632::misc-data-offset (% dest)))
593 (single-value-return 3))
594
[9368]595(defx8632lapfunction fix-digit-logandc1 ((fix 4) #|(ra 0)|# (big arg_y) (dest arg_z)) ; index 0
596 (mark-as-imm temp0)
597 (movl (@ fix (% esp)) (% temp0))
598 (unbox-fixnum temp0 temp0)
599 (movl (@ x8632::misc-data-offset (% big)) (% imm0))
600 (not (% temp0))
601 (andl (% temp0) (% imm0))
602 (mark-as-node temp0)
[10959]603 (cmpl ($ (target-nil-value)) (% dest))
[9368]604 (jne @store)
605 (box-fixnum imm0 arg_z)
606 (single-value-return 3)
607 @store
608 (movl (% imm0) (@ x8632::misc-data-offset (% dest)))
609 (single-value-return 3))
610
[9174]611(defx8632lapfunction fix-digit-logand ((fix 4) #|(ra 0)|# (big arg_y) (dest arg_z)) ; index 0
612 (mark-as-imm temp0)
613 (movl (@ fix (% esp)) (% temp0))
614 (sarl ($ x8632::fixnumshift) (% temp0))
615 (movl (@ x8632::misc-data-offset (% big)) (% imm0))
616 (andl (% temp0) (% imm0))
617 (mark-as-node temp0)
[10959]618 (cmpl ($ (target-nil-value)) (% dest))
[9174]619 (jne @store)
620 (box-fixnum imm0 arg_z)
621 (single-value-return 3)
622 @store
623 (movl (% imm0) (@ x8632::misc-data-offset (% dest)))
624 (single-value-return 3))
625
626
[7963]627(defx8632lapfunction digit-lognot-move ((index 4) #|(ra 0)|# (source arg_y) (dest arg_z))
628 (movl (@ index (% esp)) (% temp0))
629 (movl (@ x8632::misc-data-offset (% source) (% temp0)) (% imm0))
630 (not (% imm0))
631 (movl (% imm0) (@ x8632::misc-data-offset (% dest) (% temp0)))
632 (single-value-return 3))
633
634;; Add b to a starting at a[i]
635;; might want not to use SSE2 for this. use lea to update loop counter
636;; variables so that the flags don't get set.
637(defx8632lapfunction bignum-add-loop-+ ((i 8) (a 4) #|(ra 0)|# (b arg_y) (blen arg_z))
638 (let ((aa mm2)
639 (bb mm3)
640 (cc mm4))
641 (movl (@ a (% esp)) (% temp0))
642 (movl (@ i (% esp)) (% temp1))
643 (xorl (% imm0) (% imm0))
644 (pxor (% cc) (% cc))
645 @loop
646 (movd (@ x8632::misc-data-offset (% temp0) (% temp1)) (% aa))
647 (movd (@ x8632::misc-data-offset (% b) (% imm0)) (% bb))
648 (paddq (% bb) (% aa))
649 (paddq (% cc) (% aa))
650 (movd (% aa) (@ x8632::misc-data-offset (% temp0) (% temp1)))
651 (psrlq ($ 32) (% aa))
652 (movq (% aa) (% cc))
653 (addl ($ '1) (% temp1))
654 (addl ($ '1) (% imm0))
655 (subl ($ '1) (% blen))
656 (jg @loop)
657 ;; add in final carry
658 (movd (% cc) (% imm0))
659 (addl (% imm0) (@ x8632::misc-data-offset (% temp0) (% temp1)))
660 (single-value-return 4)))
661
662(defx8632lapfunction bignum-logtest-loop ((count 4) #|(ra 0)|# (s1 arg_y) (s2 arg_z))
663 (let ((i temp1)
664 (c temp0))
665 (movl (@ count (% esp)) (% c))
666 (xorl (% i) (% i))
667 @loop
668 (movl (@ x8632::misc-data-offset (% s1) (% i)) (% imm0))
669 (test (@ x8632::misc-data-offset (% s2) (% i)) (% imm0))
670 (jnz @true)
671 (addl ($ '1) (% i))
672 (cmpl (% i) (% c))
673 (jg @loop)
[10959]674 (movl ($ (target-nil-value)) (% arg_z))
[7963]675 (single-value-return 3)
676 @true
[10959]677 (movl ($ (target-t-value)) (% arg_z))
[7963]678 (single-value-return 3)))
679
[9552]680;;; shift bignum left by nbits bits (1 <= nbits < 32)
[9622]681;;; start storing into result at digit j
[9174]682(defx8632lapfunction bignum-shift-left-loop ((nbits 12) (result 8)
683 (bignum 4) #|(ra 0)|#
684 (res-len-1 arg_y) (j arg_z))
[9552]685 (movl (% ebp) (@ 16 (% esp)))
686 (leal (@ 16 (% esp)) (% ebp))
687 (popl (@ 4 (% ebp)))
688 (push (% arg_y)) ;ebp - 16
689 (push (% arg_z)) ;ebp - 20
690
691 (movl (@ -4 (% ebp)) (% imm0))
[9174]692 (sarl ($ x8632::fixnumshift) (% imm0))
693 (movd (% imm0) (% mm7)) ;shift count
[9489]694 (negl (% imm0))
[9552]695 (addl ($ 32) (% imm0))
[9174]696 (movd (% imm0) (% mm6)) ;remaining bits
[7963]697
[9552]698 (let ((rl-1 -16)
699 (r temp0)
700 (b temp1)
701 (i arg_y)
702 (i+1 imm0))
703 (movl (@ -8 (% ebp)) (% r))
704 (movl (@ -12 (% ebp)) (% b))
705 (xorl (% i) (% i))
706 (movl ($ '1) (% i+1))
707 ;; j (in arg_z) is already (1+ digits)
708 (jmp @test)
709 @loop
710 (movd (@ x8632::misc-data-offset (% b) (% i)) (% mm0))
711 (psrlq (% mm6) (% mm0))
712 (movd (@ x8632::misc-data-offset (% b) (% i+1)) (% mm1))
713 (psllq (% mm7) (% mm1))
714 (por (% mm1) (% mm0))
715 (movd (% mm0) (@ x8632::misc-data-offset (% r) (% j)))
716 (movl (% i+1) (% i))
[9622]717 (addl ($ '1) (% i+1))
[9552]718 (addl ($ '1) (% j))
719 @test
720 (cmpl (@ rl-1 (% ebp)) (% j))
721 (jne @loop)
722 (movd (@ x8632::misc-data-offset (% b)) (% mm0))
723 (psllq (% mm7) (% mm0))
724 (movl (@ -20 (% ebp)) (% imm0)) ;digits + 1 (that is, the original j)
725 (subl ($ '1) (% imm0)) ;digits
726 (movd (% mm0) (@ x8632::misc-data-offset (% r) (% imm0)))
727 (movd (@ x8632::misc-data-offset (% b) (% i)) (% mm0))
728 (psrad (% mm6) (% mm0))
729 (movd (% mm0) (@ x8632::misc-data-offset (% r) (% j))))
730 (leave)
731 (ret))
732
[9539]733;;; shift bignum right by i words plus nbits bits.
734(defx8632lapfunction bignum-shift-right-loop-1 ((nbits 12) (result 8)
735 (bignum 4) #|(ra 0)|#
736 (res-len-1 arg_y)
737 (i arg_z))
738 (movl (@ nbits (% esp)) (% imm0))
739 (sarl ($ x8632::fixnumshift) (% imm0))
740 (movd (% imm0) (% mm7)) ;shift count
741
742 (movl (@ result (% esp)) (% temp0))
743 (movl (@ bignum (% esp)) (% temp1))
744 (push (% res-len-1))
745 (xorl (% arg_y) (% arg_y)) ;index into result
746 (jmp @test)
747 @loop
748 (movq (@ x8632::misc-data-offset (% temp1) (% i)) (% mm0)) ;b[i+1] || b[i]
749 (psrlq (% mm7) (% mm0))
750 (movd (% mm0) (@ x8632::misc-data-offset (% temp0) (% arg_y)))
751 (addl ($ '1) (% i))
752 (addl ($ '1) (% arg_y))
753 @test
754 (cmpl (@ (% esp)) (% arg_y)) ;compare to res-len-1
755 (jne @loop)
756 (addl ($ x8632::node-size) (% esp))
757 @finish
758 (movd (@ x8632::misc-data-offset (% temp1) (% i)) (% mm0)) ;last digit of b
759 (psrad (% mm7) (% mm0))
760 (movd (% mm0) (@ x8632::misc-data-offset (% temp0) (% arg_y)))
761 (single-value-return 5))
762
[7963]763(defx8632lapfunction %logcount-complement ((bignum arg_y) (i arg_z))
764 (mark-as-imm temp0)
765 (let ((rshift imm0)
766 (temp temp0))
767 (movl (@ x8632::misc-data-offset (% bignum) (% i)) (% rshift))
768 (notl (% rshift))
769 (xorl (% arg_z) (% arg_z))
770 (testl (% rshift) (% rshift))
771 (jmp @test)
772 @next
773 (lea (@ -1 (% rshift)) (% temp))
774 (and (% temp) (% rshift)) ;sets flags
775 (lea (@ '1 (% arg_z)) (% arg_z)) ;doesn't set flags
776 @test
777 (jne @next)
778 (mark-as-node temp0)
779 (single-value-return)))
780
781(defx8632lapfunction %logcount ((bignum arg_y) (i arg_z))
782 (mark-as-imm temp0)
783 (let ((rshift imm0)
784 (temp temp0))
785 (movl (@ x8632::misc-data-offset (% bignum) (% i)) (% rshift))
786 (xorl (% arg_z) (% arg_z))
787 (testl (% rshift) (% rshift))
788 (jmp @test)
789 @next
790 (lea (@ -1 (% rshift)) (% temp))
791 (and (% temp) (% rshift)) ;sets flags
792 (lea (@ '1 (% arg_z)) (% arg_z)) ;doesn't set flags
793 @test
794 (jne @next)
795 (mark-as-node temp0)
796 (single-value-return)))
797
798
799;;; Divide bignum x by single digit y (passed as two halves).
800;;; The quotient in stored in q, and the remainder is returned
[9003]801;;; in two halves. (cf. Knuth, 4.3.1, exercise 16)
802(defx8632lapfunction %floor-loop-quo ((x 8) (res 4) #|(ra 0)|# (yhi arg_y) (ylo arg_z))
803 (compose-digit yhi ylo imm0)
[10575]804 (movl (% imm0) (:rcontext x8632::tcr.unboxed0))
[9003]805 (pop (% temp0))
806 (pop (% arg_z)) ;res
807 (pop (% arg_y)) ;x
808 (discard-reserved-frame)
809 (push (% temp0))
810 (mark-as-imm edx) ;aka temp1
811 (let ((bignum arg_y) ;bignum dividend
812 (result arg_z)) ;bignum result (quotient)
813 (xorl (% edx) (% edx))
814 (vector-length bignum temp0)
815 (jmp @next)
816 @loop
817 (movl (@ x8632::misc-data-offset (% bignum) (% temp0)) (% eax))
[10575]818 (divl (:rcontext x8632::tcr.unboxed0))
[9003]819 (movl (% eax) (@ x8632::misc-data-offset (% result) (% temp0)))
820 @next
821 (subl ($ '1) (% temp0))
822 (jge @loop))
823 (movl (% esp) (% temp0))
824 ;; extract and push high half of remainder
825 (movl ($ (- #x10000)) (% arg_y))
826 (andl (% edx) (% arg_y))
827 (shrl ($ (- 16 x8632::fixnumshift)) (% arg_y))
828 (push (% arg_y))
829 ;; extract and push low half
[9624]830 (andl ($ #xffff) (% edx))
831 (shll ($ x8632::fixnumshift) (% edx))
[9003]832 (push (% edx))
833 (mark-as-node edx)
834 (set-nargs 2)
835 (jmp-subprim .SPvalues))
[7963]836
[9003]837;;; For TRUNCATE-BY-FIXNUM et al.
838;;; Doesn't store quotient: just returns rem in 2 halves.
839;;; Could avoid using tcr.unboxed0 if it matters...
840(defx8632lapfunction %floor-loop-no-quo ((x 4) #|(ra 0)|# (yhi arg_y) (ylo arg_z))
841 (compose-digit yhi ylo imm0)
[10575]842 (movl (% imm0) (:rcontext x8632::tcr.unboxed0))
[9003]843 (pop (% temp0))
844 (pop (% arg_y))
845 (discard-reserved-frame)
846 (push (% temp0))
847 (mark-as-imm edx) ;aka temp1
848 (let ((bignum arg_y) ;bignum dividend
849 (result arg_z)) ;bignum result (quotient)
850 (xorl (% edx) (% edx))
851 (vector-length bignum temp0)
852 (jmp @next)
853 @loop
854 (movl (@ x8632::misc-data-offset (% bignum) (% temp0)) (% eax))
[10575]855 (divl (:rcontext x8632::tcr.unboxed0))
[9003]856 ;;(movl (% eax) (@ x8632::misc-data-offset (% result) (% temp0)))
857 @next
858 (subl ($ '1) (% temp0))
859 (jge @loop))
860 (movl (% esp) (% temp0))
861 ;; extract and push high half of remainder
862 (movl ($ (- #x10000)) (% arg_y))
863 (andl (% edx) (% arg_y))
864 (shrl ($ (- 16 x8632::fixnumshift)) (% arg_y))
865 (push (% arg_y))
866 ;; extract and push low half
[9624]867 (andl ($ #xffff) (% edx))
868 (shll ($ x8632::fixnumshift) (% edx))
[9003]869 (push (% edx))
870 (mark-as-node edx)
871 (set-nargs 2)
872 (jmp-subprim .SPvalues))
[7963]873
[9552]874;;; transliterated from bignum-truncate-guess in l0-bignum64.lisp
875;;; this is not beautiful...
[8827]876(defx8632lapfunction truncate-guess-loop ((guess-h 16) (guess-l 12) (x 8)
877 (xidx 4) #|(ra 0)|#
878 (yptr arg_y) (yidx arg_z))
[11080]879 (save-stackargs-frame 4)
[9552]880 (push (% arg_y))
881 (push (% arg_z))
[7963]882
[9552]883 (movl (@ -4 (% ebp)) (% temp0)) ;guess-h
884 (movl (@ -8 (% ebp)) (% temp1)) ;guess-l
885 (compose-digit temp0 temp1 imm0)
886 (movd (% imm0) (% mm0)) ;save guess
887
[11080]888 @loop
889 (movl (@ (% esp)) (% yidx))
890 (movl (@ 4 (% esp)) (% yptr))
[9552]891 (movd (@ (- x8632::misc-data-offset 0) (% yptr) (% yidx)) (% mm1)) ;y1 (high)
892 ;; (%multiply guess y1)
893 (pmuludq (% mm0) (% mm1))
894 ;; (%multiply guess y2)
895 (movd (@ (- x8632::misc-data-offset 4) (% yptr) (% yidx)) (% mm2)) ;y2 (low)
896 (pmuludq (% mm0) (% mm2))
897
898 (movl (@ -12 (% ebp)) (% temp0)) ;x
899 (movl (@ -16 (% ebp)) (% arg_y)) ;xidx
900 (mark-as-imm temp1) ;edx now unboxed
901
902 ;; (%subtract-with-borrow x-i-1 low-guess*y1 1)
903 (movl (@ (- x8632::misc-data-offset 4) (% temp0) (% arg_y)) (% edx)) ;x-i-1
904 (movd (% mm1) (% eax)) ;low part of y1*guess
905 (subl (% eax) (% edx))
906 (movd (% edx) (% mm6)) ;save middle digit
907 ;; (%subtract-with-borrow x-i high-guess*y1 borrow)
908 (movl (@ (- x8632::misc-data-offset 0) (% temp0) (% arg_y)) (% edx)) ;x-i
909 (movq (% mm1) (% mm3))
910 (psrlq ($ 32) (% mm3)) ;get high part into low half
911 (movd (% mm3) (% eax)) ;high part of y1*guess
912 (sbbl (% eax) (% edx))
913 (movd (% edx) (% mm7)) ;save high digit
[11080]914 ;; see if guess is suitable
[9552]915 ;; if (and (= high-digit 0)
916 (test (% edx) (% edx))
917 (jne @return)
918 ;; (or (> high-guess*y2 middle-digit)
919 (movq (% mm2) (% mm3))
920 (psrlq ($ 32) (% mm3))
921 (movd (% mm3) (% eax)) ;high part of y2*guess
922 (movd (% mm6) (% edx)) ;middle-digit
923 (cmpl (% edx) (% eax))
[9797]924 (ja @decrement)
[9552]925 ;; (and (= middle-digit high-guess*y2)
[9624]926 (jne @return)
[9552]927 ;; (> low-guess*y2 x-i-2)
928 (movd (% mm2) (% eax)) ;low part of y2*guess
929 (movl (@ (- x8632::misc-data-offset 8) (% temp0) (% arg_y)) (% edx)) ;x-i-2
930 (cmpl (% edx) (% eax))
[9797]931 (ja @decrement)
[9552]932 @return
933 (mark-as-node edx)
934 (leave)
935 (movl (% esp) (% temp0))
936 (movd (% mm0) (% imm0))
937 (shrl ($ 16) (% imm0))
938 (shll ($ x8632::fixnumshift) (% imm0)) ;high half
939 (push (% imm0))
940 (movd (% mm0) (% imm0))
[9624]941 (andl ($ #xffff) (% imm0))
942 (shll ($ x8632::fixnumshift) (% imm0))
[9552]943 (push (% imm0)) ;low half
944 (set-nargs 2)
945 (jmp-subprim .SPvalues)
946 @decrement
947 (movd (% mm0) (% imm0)) ;guess
[9624]948 (subl ($ 1) (% imm0))
[9552]949 (movd (% imm0) (% mm0))
[11080]950 (jmp @loop))
[9552]951
[9488]952;;; If x[i] = y[j], return the all ones digit (as two halves).
953;;; Otherwise, compute floor x[i]x[i-1] / y[j].
[8827]954(defx8632lapfunction %floor-99 ((x-stk 8) (xidx 4) #|(ra 0)|#
955 (yptr arg_y) (yidx arg_z))
[10923]956 (pop (% temp1))
957 (pop (% imm0))
[9488]958 (pop (% temp0))
959 (discard-reserved-frame)
[10923]960 (push (% temp1))
961 (movl (% imm0) (% temp1))
962 (movl (@ x8632::misc-data-offset (% temp0) (% temp1)) (% imm0)) ;x[i]
[9552]963 (cmpl (% imm0) (@ x8632::misc-data-offset (% yptr) (% yidx))) ;y[j]
[9488]964 (jne @more)
965 (pushl ($ '#xffff))
966 (pushl ($ '#xffff))
967 (lea (@ '2 (% esp)) (% temp0))
968 (set-nargs 2)
969 (jmp-subprim .SPvalues)
970 @more
971 (mark-as-imm edx) ;aka temp1 (contains a fixnum)
[10923]972 (movl (@ (- x8632::misc-data-offset 4) (% temp0) (% temp1)) (% eax)) ;low
973 (movl (@ x8632::misc-data-offset (% temp0) (% temp1)) (% edx)) ;high digit
[9623]974 (divl (@ x8632::misc-data-offset (% yptr) (% yidx)))
[9488]975 (mark-as-node edx)
976 ;; extract and push high half of quotient
977 (movl ($ (- #x10000)) (% arg_y))
978 (andl (% eax) (% arg_y))
979 (shrl ($ (- 16 x8632::fixnumshift)) (% arg_y))
980 (push (% arg_y))
981 ;; extract and push low half
[9623]982 (andl ($ #xffff) (% eax))
[9488]983 (shll ($ x8632::fixnumshift) (% eax))
984 (push (% eax))
985 (set-nargs 2)
986 (lea (@ '2 (% esp)) (% temp0))
987 (jmp-subprim .SPvalues))
[8827]988
[9552]989;;; x * y + carry
[8827]990(defx8632lapfunction %multiply-and-add-1 ((x-high 16)
991 (x-low 12)
992 (y-high 8)
993 (y-low 4)
994 #|(ra 0)|#
995 (carry-in-high arg_y)
996 (carry-in-low arg_z))
[9552]997 (movl (@ x-high (% esp)) (% temp0))
998 (movl (@ x-low (% esp)) (% temp1))
999 (compose-digit temp0 temp1 imm0)
1000 (movd (% imm0) (% mm0))
1001 (movl (@ y-high (% esp)) (% temp0))
1002 (movl (@ y-low (% esp)) (% temp1))
1003 (compose-digit temp0 temp1 imm0)
1004 (movd (% imm0) (% mm1))
1005 (pmuludq (% mm1) (% mm0)) ;x * y
1006 (compose-digit arg_y arg_z imm0)
1007 (movd (% imm0) (% mm1))
1008 (paddq (% mm1) (% mm0)) ;add in carry digit
1009 (movq (% mm0) (% mm1))
1010 (psrlq ($ 32) (% mm1)) ;resultant carry digit
1011 ;; clean up stack
1012 (pop (% temp0))
1013 (addl ($ '6) (% esp))
1014 (push (% temp0))
1015 ;; return (values carry-h carry-l result-h result-l)
1016 (movl (% esp) (% temp0))
1017 (movd (% mm1) (% imm0))
1018 (shrl ($ 16) (% imm0))
1019 (shll ($ x8632::fixnumshift) (% imm0)) ;carry-h
1020 (push (% imm0))
1021 (movd (% mm1) (% imm0))
1022 (shll ($ 16) (% imm0))
1023 (shrl ($ (- 16 x8632::fixnumshift)) (% imm0)) ;carry-l
1024 (push (% imm0))
1025 (movd (% mm0) (% imm0))
1026 (shrl ($ 16) (% imm0))
1027 (shll ($ x8632::fixnumshift) (% imm0)) ;result-h
1028 (push (% imm0))
1029 (movd (% mm0) (% imm0))
1030 (shll ($ 16) (% imm0))
1031 (shrl ($ (- 16 x8632::fixnumshift)) (% imm0)) ;result-l
1032 (push (% imm0))
1033 (set-nargs 4)
1034 (jmp-subprim .SPvalues))
[8827]1035
1036;;; Copy the limb SRC points to to where DEST points.
1037(defx8632lapfunction copy-limb ((src arg_y) (dest arg_z))
1038 (int ($ 3)))
1039
1040;;; Return T iff LIMB contains 0.
1041(defx8632lapfunction limb-zerop ((limb arg_z))
1042 (int ($ 3)))
1043
1044;;; Return -1,0,1 according to whether the contents of Y are
1045;;; <,=,> the contents of Z.
1046(defx8632lapfunction compare-limbs ((y arg_y) (z arg_z))
1047 (int ($ 3)))
1048
1049;;; Add a fixnum to the limb LIMB points to. Ignore overflow.
1050(defx8632lapfunction add-fixnum-to-limb ((fixnum arg_y) (limb arg_z))
1051 (int ($ 3)))
1052
1053;;; Store a fixnum value where LIMB points.
1054(defx8632lapfunction copy-fixnum-to-limb ((fixnum arg_y) (limb arg_z))
1055 (int ($ 3)))
1056
1057;;; Increment a "LIMB VECTOR" (bignum) by a small amount. The caller
1058;;; knows that carries will only propagate for a word or two.
1059(defx8632lapfunction mpn-incr-u ((limb arg_y) (fixby arg_z))
1060 (int ($ 3)))
1061
1062;;; Store XP-YP at WP; return carry (0 or 1).
1063;;; wp, xp, yp: word-aligned, unboxed ptrs (fixnums)
1064;;; size: boxed fixnum
1065;;; returns boxed carry
1066(defx8632lapfunction mpn-sub-n ((wp 8) (xp 4) #|(ra 0)|#
1067 (yp arg_y) (size arg_z))
1068 (int ($ 3)))
1069
1070;;; Store XP+YP at WP; return carry (0 or 1).
1071;;; wp, xp, yp = word-aligned, unboxed macptrs (fixnums).
1072;;; size = boxed fixnum
1073;;; result = boxed carry
1074(defx8632lapfunction mpn-add-n ((wp 8) (xp 4) #|(ra 0)|#
1075 (yp arg_y) (size arg_z))
1076 (int ($ 3)))
1077
1078;;; Add the single limb LIMB to S1P (propagating carry.) Store the
1079;;; result at RP. RP and S1P may be the same place, so check for
1080;;; that and do nothing after carry stops propagating. Return carry.
1081(defx8632lapfunction mpn-add-1 ((rp-offset 8) (s1p 4) #|(ra 0)|#
1082 (size arg_y) (limb arg_z))
1083 (int ($ 3)))
1084
1085;;; Multiply the limb vector S1 by the single limb at LIMBPTR, storing
1086;;; the result at RES. Store the "carry out" (high word of last 64-bit
1087;;; partial product) at the limb RESULT.
1088;;; res, s1, limbptr, result:
1089;;; unboxed, word-aligned ptrs (fixnums). size: boxed fixnum
1090;;; It'd be hard to transliterate the GMP code here; the GMP version
1091;;; uses lots more immediate registers than we can easily use in LAP
1092;;; (and is much more aggressively pipelined).
1093(defx8632lapfunction mpn-mul-1 ((res-offset 12)
1094 (s1-offset 8)
1095 (size 4)
1096 #|(ra 0)|#
1097 (limbptr arg_y)
1098 (result arg_z))
1099 (int ($ 3)))
1100
1101;;; multiply s1*limb and add result to res
1102;;; res, s1, limbptr, result:
1103;;; unboxed, word-aligned ptrs (fixnums).
1104;;; size: boxed fixnum
1105;;; limbptr: source "limb".
1106;;; result: carry out (high word of product).
1107(defx8632lapfunction mpn-addmul-1 ((res-offset 12)
1108 (s1-offset 8)
1109 (size 4)
1110 #|(ra 0)|#
1111 (limbptr arg_y)
1112 (result arg_z))
1113 (int ($ 3)))
1114
1115;;; Multiply the UN-word limb vector at UP and the VN-word limb vector
1116;;; at VP, store the result at RP.
1117(defx8632lapfunction mpn-mul-basecase ((rp-offset 12)
1118 (up-offset 8)
1119 (un 4)
1120 #|(ra 0)|#
1121 (vp arg_y)
1122 (vn arg_z))
1123 (int ($ 3)))
1124
1125;;; left-shift src by 1 bit, storing result at res. Return
1126;;; the bit that was shifted out.
1127(defx8632lapfunction mpn-lshift-1 ((resptr 4) #|(ra 0)|#
1128 (s1ptr arg_y) (size-arg arg_z))
1129 (int ($ 3)))
1130
1131;;; Do a 32x32=64 unsigned multiply of the words at X and Y. Store
1132;;; result (low word first) at RESULT.
1133(defx8632lapfunction umulppm ((x 4) #|(ra 0)|# (y arg_y) (result arg_z))
1134 (int ($ 3)))
1135
1136(defx8632lapfunction %fixnum-to-bignum-set ((bignum arg_y) (fixnum arg_z))
1137 (unbox-fixnum fixnum imm0)
1138 (movl (% imm0) (@ x8632::misc-data-offset (% bignum)))
1139 (single-value-return))
[9003]1140
1141(defx8632lapfunction bignum-negate-loop-really ((bignum 4) #|(ra 0)|#
1142 (len arg_y) (result arg_z))
1143 (mark-as-imm edx) ;aka %temp1
1144 (unbox-fixnum arg_y edx)
1145 (movl (@ bignum (% esp)) (% arg_y))
1146 (xorl (% temp0) (% temp0))
1147 (stc)
1148 @loop
1149 (movl (@ x8632::misc-data-offset (% arg_y) (% temp0)) (% imm0))
1150 (not (% imm0))
1151 (adc ($ 0) (% imm0))
1152 (movl (% imm0) (@ x8632::misc-data-offset (% arg_z) (% temp0)))
1153 (lea (@ x8632::node-size (% temp0)) (% temp0))
1154 (decl (% edx)) ;preserves carry flag
1155 (jg @loop)
1156 ;; return carry
1157 (setc (% imm0.b))
1158 (movzbl (% imm0.b) (% imm0))
1159 (box-fixnum imm0 arg_z)
1160 (mark-as-node edx)
1161 (single-value-return 3))
1162
1163(defx8632lapfunction %bignum-set ((bignum 8) (i 4) #|(ra 0)|#
1164 (high arg_y) (low arg_z))
1165 (compose-digit high low imm0)
1166 (movl (@ bignum (% esp)) (% arg_z))
1167 (movl (@ i (% esp)) (% arg_y))
1168 (movl (% imm0) (@ x8632::misc-data-offset (% arg_z) (% arg_y)))
1169 (single-value-return 4))
1170
Note: See TracBrowser for help on using the repository browser.