source: release/1.11/source/level-0/X86/X8632/x8632-bignum.lisp

Last change on this file was 16688, checked in by R. Matthew Emerson, 9 years ago

Merge copyright/license header changes to 1.11 release branch.

File size: 40.4 KB
RevLine 
[16688]1;;;
[13067]2;;; Copyright 2009 Clozure Associates
3;;;
[16688]4;;; Licensed under the Apache License, Version 2.0 (the "License");
5;;; you may not use this file except in compliance with the License.
6;;; You may obtain a copy of the License at
[13067]7;;;
[16688]8;;; http://www.apache.org/licenses/LICENSE-2.0
[13067]9;;;
[16688]10;;; Unless required by applicable law or agreed to in writing, software
11;;; distributed under the License is distributed on an "AS IS" BASIS,
12;;; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13;;; See the License for the specific language governing permissions and
14;;; limitations under the License.
[13067]15
[7903]16(in-package "CCL")
17
18;;; %BIGNUM-REF needs to access bignums as obviously as possible, and it needs
19;;; to be able to return 32 bits somewhere no one looks for real objects.
20;;;
21;;; The easiest thing to do is to store the 32 raw bits in two fixnums
22;;; and return multiple values.
[7963]23;;;
[7903]24(defx8632lapfunction %bignum-ref ((bignum arg_y) (i arg_z))
[9003]25 (movl (% esp) (% temp0)) ;ptr to return addr on stack in temp0
[7914]26 (movzwl (@ (+ 2 x8632::misc-data-offset) (% bignum) (% i)) (% imm0))
27 (box-fixnum imm0 temp1)
28 (push (% temp1)) ;high
[7903]29 (movzwl (@ x8632::misc-data-offset (% bignum) (% i)) (% imm0))
[7914]30 (box-fixnum imm0 temp1)
31 (push (% temp1)) ;low
[7903]32 (set-nargs 2)
33 (jmp-subprim .SPvalues))
34
[8827]35(defx8632lapfunction %bignum-ref-hi ((bignum arg_y) (i arg_z))
[9606]36 (movzwl (@ (+ 2 x8632::misc-data-offset) (% bignum) (% i)) (% imm0))
37 (box-fixnum imm0 arg_z)
38 (single-value-return))
[8827]39
[7903]40;;; BIGNUM[I] := DIGIT[0]
41(defx8632lapfunction %set-digit ((bignum 4) #|(ra 0)|# (i arg_y) (digit arg_z))
42 (movl (@ bignum (% esp)) (% temp0))
43 (svref digit 0 imm0)
44 (movl (% imm0) (@ x8632::misc-data-offset (% temp0) (% i)))
45 (single-value-return 3))
46
47;;; Return the sign of bignum (0 or -1) as a fixnum
48(defx8632lapfunction %bignum-sign ((bignum arg_z))
49 (vector-length bignum imm0)
[7914]50 (movl (@ (- x8632::misc-data-offset 4) (% bignum) (% imm0)) (% imm0))
51 (sarl ($ 31) (% imm0)) ;propagate sign bit
[7903]52 (box-fixnum imm0 arg_z)
53 (single-value-return))
54
55;;; Count the sign bits in the most significant digit of bignum;
56;;; return fixnum count.
57(defx8632lapfunction %bignum-sign-bits ((bignum arg_z))
[7914]58 (vector-length bignum imm0)
59 (movl (@ (- x8632::misc-data-offset 4) (% bignum) (% imm0)) (% imm0))
[7903]60 (mark-as-imm temp0)
61 (movl (% imm0) (% temp0))
62 (notl (% imm0))
63 (testl (% temp0) (% temp0))
64 (js @wasneg)
65 (notl (% imm0))
66 @wasneg
67 (bsrl (% imm0) (% imm0))
68 (sete (% temp0.b))
69 (xorl ($ 31) (% imm0))
70 (addb (% temp0.b) (% imm0.b))
71 (box-fixnum imm0 arg_z)
72 (mark-as-node temp0)
73 (single-value-return))
74
75(defx8632lapfunction %digit-0-or-plusp ((bignum arg_y) (idx arg_z))
[7907]76 (movl (@ x8632::misc-data-offset (% bignum) (% idx)) (% imm0))
[10959]77 (movl ($ (target-nil-value)) (% temp0))
[7914]78 (leal (@ x8632::t-offset (% temp0)) (% arg_z))
[7907]79 (testl (% imm0) (% imm0))
[7914]80 (cmovll (% temp0) (% arg_z))
[7903]81 (single-value-return))
82
83;;; For oddp, evenp
84(defx8632lapfunction %bignum-oddp ((bignum arg_z))
[7907]85 (movl (@ x8632::misc-data-offset (% bignum)) (% imm0))
[10959]86 (movl ($ (target-nil-value)) (% temp0))
[7914]87 (leal (@ x8632::t-offset (% temp0)) (% arg_z))
88 (testb ($ 1) (% imm0.b))
89 (cmovzl (% temp0) (% arg_z))
[7903]90 (single-value-return))
91
92(defx8632lapfunction bignum-plusp ((bignum arg_z))
93 (vector-length bignum imm0)
[7914]94 (movl (@ (- x8632::misc-data-offset 4) (% bignum) (% imm0)) (% imm0))
[10959]95 (movl ($ (target-nil-value)) (% arg_z))
[9003]96 (lea (@ x8632::t-offset (% arg_z)) (% temp0))
[7914]97 (testl (% imm0) (% imm0))
[9003]98 (cmovnsl (% temp0) (% arg_z))
[7903]99 (single-value-return))
100
101(defx8632lapfunction bignum-minusp ((bignum arg_z))
102 (vector-length bignum imm0)
[7914]103 (movl (@ (- x8632::misc-data-offset 4) (% bignum) (% imm0)) (% imm0))
[10959]104 (movl ($ (target-nil-value)) (% arg_z))
[9003]105 (lea (@ x8632::t-offset (% arg_z)) (% temp0))
[7907]106 (testl (% imm0) (% imm0))
[9003]107 (cmovsl (% temp0) (% arg_z))
[7903]108 (single-value-return))
109
[7914]110;;; Add the digits A[I] and B[J], and the incoming carry C (a fixnum,
111;;; either 0 or 1). Store the result in R[K], and return the outgoing
112;;; carry. If I is NIL, A is a fixnum. If J is NIL, B is a fixnum.
[7903]113(defx8632lapfunction %add-with-carry ((r 20) (k 16) (c 12) (a 8) (i 4) #|(ra 0)|# (b arg_y) (j arg_z))
114 (mark-as-imm temp0)
[7963]115 (unbox-fixnum b imm0)
[10959]116 (cmpl ($ (target-nil-value)) (% j))
[7963]117 ;; if j not nil, get b[j]
118 (cmovnel (@ x8632::misc-data-offset (% b) (% j)) (% imm0))
[7903]119 (movl (@ a (% esp)) (% arg_y))
[7963]120 (unbox-fixnum arg_y temp0)
[7903]121 (movl (@ i (% esp)) (% arg_z))
[10959]122 (cmpl ($ (target-nil-value)) (% arg_z))
[7963]123 ;; if i not nil, get a[i]
124 (cmovnel (@ x8632::misc-data-offset (% arg_y) (% arg_z)) (% temp0))
[9368]125 (xorl (% arg_z) (% arg_z))
[7963]126 ;; I can't think of a better way to set CF at the moment.
127 ;; NEG would be ideal, but we don't have a free imm reg.
128 (btl ($ x8632::fixnumshift) (@ c (% esp))) ;CF = lsb of carry fixnum
[7903]129 (adc (% temp0) (% imm0))
[9368]130 (setc (% arg_z.bh))
131 (sarl ($ (- 8 x8632::fixnumshift)) (% arg_z)) ;outgoing carry
[7963]132 (mark-as-node temp0)
[7903]133 (movl (@ r (% esp)) (% temp0))
134 (movl (@ k (% esp)) (% temp1))
135 (movl (% imm0) (@ x8632::misc-data-offset (% temp0) (% temp1)))
136 (single-value-return 7))
137
[7963]138;;; Add the digits A[I] and B[J], and the incoming carry C (a fixnum).
139;;; Store the result in R[K], and return the outgoing carry. If I is
140;;; NIL, A is a fixnum. If J is NIL, B is a fixnum.
141#+sse2
142(defx8632lapfunction %add-with-carry ((r 20) (k 16) (c 12) (a 8) (i 4) #|(ra 0)|# (b arg_y) (j arg_z))
143 (let ((aa mm2)
144 (bb mm3)
145 (cc mm4))
146 (unbox-fixnum b imm0) ;assume j will be nil
[10959]147 (cmpl ($ (target-nil-value)) (% j))
[7963]148 ;; if j not nil, get b[j]
149 (cmovnel (@ x8632::misc-data-offset (% b) (% j)) (% imm0))
150 (movd (% imm0) (% bb))
151 (movl (@ a (% esp)) (% arg_y))
152 (movl (@ i (% esp)) (% arg_z))
153 (movl (@ c (% esp)) (% temp0))
154 (unbox-fixnum arg_y imm0) ;assume i will be nil
[10959]155 (cmpl ($ (target-nil-value)) (% arg_z))
[7963]156 ;; if i not nil, get a[i]
157 (cmovnel (@ x8632::misc-data-offset (% arg_y) (% arg_z)) (% imm0))
158 (movd (% imm0) (% aa))
159 (unbox-fixnum temp0 imm0)
160 (movd (% imm0) (% cc))
161 (paddq (% xx) (% yy))
162 (paddq (% cc) (% yy))
163 (movl (@ r (% esp)) (% temp0))
164 (movl (@ k (% esp)) (% temp1))
165 (movd (% yy) (@ x8632::misc-data-offset (% temp0) (% temp1)))
166 (psrlq ($ 32) (% yy)) ;carry bit
167 (movd (% yy) (% imm0))
168 (box-fixnum imm0 arg_z)
169 (single-value-return 7)))
170
[7903]171;;; Store the result of A[I] - B[J] - borrow into R[K], returning the borrow.
172;;; If I is NIL, A is a fixnum; likewise for J and B.
[7963]173;;;
174;;; (a - b) - (1 - borrow), or equivalently, (a - b) + borrow - 1
175;;;
176;;; Note: borrow is 1 for no borrow and 0 for a borrow.
[7903]177(defx8632lapfunction %subtract-with-borrow ((r 20) (k 16) (borrow 12) (a 8) (i 4) #|(ra 0)|# (b arg_y) (j arg_z))
178 (mark-as-imm temp0)
[7963]179 (unbox-fixnum b imm0)
[10959]180 (cmpl ($ (target-nil-value)) (% j))
[7963]181 (cmovnel (@ x8632::misc-data-offset (% b) (% j)) (% imm0))
[7903]182 (movl (@ a (% esp)) (% arg_y))
[7963]183 (unbox-fixnum arg_y temp0)
[7903]184 (movl (@ i (% esp)) (% arg_z))
[10959]185 (cmpl ($ (target-nil-value)) (% arg_z))
[7963]186 (cmovnel (@ x8632::misc-data-offset (% arg_y) (% arg_z)) (% temp0))
187 ;; unboxed a or a[i] in temp0, unboxed b or b[j] in imm0
188 (cmpl ($ '1) (@ borrow (% esp))) ;CF = 1 if borrow is 0 else CF = 0
[7903]189 (sbb (% imm0) (% temp0))
[7963]190 (movl ($ 1) (% imm0))
191 (sbb ($ 0) (% imm0))
192 (box-fixnum imm0 arg_z)
193 (movl (% temp0) (% imm0))
194 (mark-as-node temp0)
[7903]195 (movl (@ r (% esp)) (% temp0))
196 (movl (@ k (% esp)) (% temp1))
197 (movl (% imm0) (@ x8632::misc-data-offset (% temp0) (% temp1)))
198 (single-value-return 7))
[7914]199
[7963]200#+sse2
201(defx8632lapfunction %subtract-with-borrow ((r 20) (k 16) (borrow 12) (a 8) (i 4) #|(ra 0)|# (b arg_y) (j arg_z))
202 (let ((aa mm2)
203 (bb mm3)
204 (ww mm4))
205 (unbox-fixnum b imm0)
[10959]206 (cmpl ($ (target-nil-value)) (% j))
[7963]207 ;; if j not nil, get b[j]
208 (cmovnel (@ x8632::misc-data-offset (% b) (% j)) (% imm0))
209 (movd (% imm0) (% bb))
210 (movl (@ a (% esp)) (% arg_y))
211 (movl (@ i (% esp)) (% arg_z))
212 (movl (@ borrow (% esp)) (% temp0))
213 (unbox-fixnum arg_y imm0)
[10959]214 (cmpl ($ (target-nil-value)) (% arg_z))
[7963]215 ;; if i not nil, get a[i]
216 (cmovnel (@ x8632::misc-data-offset (% arg_y) (% arg_z)) (% imm0))
217 (movd (% imm0) (% aa))
218 (unbox-fixnum temp0 imm0)
219 (subl ($ 1) (% imm0))
220 (movd (% imm0) (% ww))
221 (psubq (% bb) (% aa))
222 (paddq (% ww) (% aa))
223 (movl (@ r (% esp)) (% temp0))
224 (movl (@ k (% esp)) (% temp1))
225 (movd (% aa) (@ x8632::misc-data-offset (% temp0) (% temp1)))
226 (psrlq ($ 32) (% aa)) ;carry digit
227 (movd (% aa) (% imm0))
228 (xorl (% arg_z) (% arg_z))
229 (test ($ 1) (% imm0))
230 (cmovzl ($ '1) (% arg_z))
231 (single-value-return 7)))
232
233(defx8632lapfunction %subtract-one ((high arg_y) (low arg_z))
[10923]234 (shll ($ (- 16 x8632::fixnumshift)) (% arg_y))
[7963]235 (unbox-fixnum low imm0)
[10923]236 ;; high half should always be clear...
237 ;;(movzwl (% imm0.w) (% imm0))
238 (orl (% arg_y) (% imm0))
239 (decl (% imm0))
[7987]240 (movl (% esp) (% temp0))
[10923]241 ;; extract and push high half
242 (movl ($ (- #x10000)) (% arg_y))
243 (andl (% imm0) (% arg_y))
244 (shrl ($ (- 16 x8632::fixnumshift)) (% arg_y))
245 (push (% arg_y))
246 ;; low half
247 (andl ($ #xffff) (% imm0))
248 (shll ($ x8632::fixnumshift) (% imm0))
249 (push (% imm0))
250 (set-nargs 2)
[7963]251 (jmp-subprim .SPvalues))
252
253;;; %SUBTRACT-WITH-BORROW -- Internal.
254;;;
255;;; This should be in assembler, and should not cons intermediate results. It
256;;; returns a 32bit digit and a borrow resulting from subtracting b from a, and
257;;; subtracting a possible incoming borrow.
258;;;
259;;; We really do: a - b - 1 + borrow, where borrow is either 0 or 1.
260;;;
261
262(defx8632lapfunction %subtract-with-borrow-1 ((a-h 12) (a-l 8) (b-h 4) #|(ra 0)|# (b-l arg_y) (borrow arg_z))
263 (mark-as-imm temp0)
264 (mark-as-imm temp1)
265 (unbox-fixnum b-l temp0)
266 (movl (@ b-h (% esp)) (% imm0))
[9624]267 (sarl ($ x8632::fixnumshift) (% imm0))
[7963]268 (shll ($ 16) (% imm0))
[9624]269 (orl (% imm0) (% temp0)) ;b in temp0
[7963]270 (movl (@ a-l (% esp)) (% temp1))
[9624]271 (sarl ($ x8632::fixnumshift) (% temp1))
[7963]272 (movl (@ a-h (% esp)) (% imm0))
[9624]273 (sarl ($ x8632::fixnumshift) (% imm0))
[7963]274 (shll ($ 16) (% imm0))
[9624]275 (orl (% imm0) (% temp1)) ;a in temp1
276
277 (unbox-fixnum borrow imm0)
278 (subl ($ 1) (% imm0)) ;sets carry appropriately
[7963]279 (sbbl (% temp0) (% temp1))
[9624]280 (setae (%b imm0)) ;resulting borrow (1 for no, 0 for yes)
[10923]281 (movzbl (%b imm0) (% imm0))
[7963]282 (box-fixnum imm0 arg_z)
[9624]283 (movl (% temp1) (% imm0))
284 (andl ($ (- #x10000)) (% imm0))
285 (shrl ($ (- 16 x8632::fixnumshift)) (% imm0))
[7987]286 (popl (% arg_y)) ;return address
287 (addl ($ '5) (% esp)) ;discard reserved frame & stack args
288 (pushl (% arg_y))
[7963]289 (push (% imm0)) ;high
[9624]290 (andl ($ #xffff) (% temp1))
[7963]291 (box-fixnum temp1 imm0)
292 (mark-as-node temp0)
293 (mark-as-node temp1)
294 (push (% imm0)) ;low
295 (push (% arg_z)) ;borrow
296 (set-nargs 3)
[7987]297 (leal (@ '3 (% esp)) (% temp0))
[7963]298 (jmp-subprim .SPvalues))
299
300
[7914]301;;; To normalize a bignum is to drop "trailing" digits which are
[7916]302;;; redundant sign information. When return-fixnum-p is non-nil, make
[7914]303;;; the resultant bignum into a fixnum if it fits.
304(defx8632lapfunction %normalize-bignum-2 ((return-fixnum-p arg_y) (bignum arg_z))
305 (push (% return-fixnum-p))
306 (mark-as-imm temp0)
307 (mark-as-imm temp1)
308 (let ((len arg_y)
309 (sign temp0)
310 (next temp1))
311 (vector-length bignum len)
312 (cmpl ($ '1) (% len))
[7916]313 (jle @maybe-return-fixnum)
314 ;; Zero trailing sign digits.
315 (push (% len))
[7914]316 ;; next-to-last digit
317 (movl (@ (- x8632::misc-data-offset 8) (% bignum) (% len)) (% next))
318 ;; last digit
319 (movl (@ (- x8632::misc-data-offset 4) (% bignum) (% len)) (% sign))
320 (jmp @test)
321 @loop
322 (subl ($ '1) (% len))
323 (movl ($ 0) (@ x8632::misc-data-offset (% bignum) (% len)))
324 (cmpl ($ '1) (% len)) ;any more digits?
[7916]325 (je @adjust-length)
[7914]326 (movl (% next) (% sign))
[7963]327 ;; (bignum-ref bignum (- len 2))
[7915]328 (movl (@ (- x8632::misc-data-offset 8) (% bignum) (% len)) (% next))
[7914]329 @test
330 (movl (% next) (% imm0))
331 (sarl ($ 31) (% imm0)) ;propagate sign bit
[7963]332 (xorl (% sign) (% imm0)) ;whole digit only sign?
[7914]333 (jz @loop)
[7916]334 ;; New length now in len.
335 @adjust-length
336 (pop (% imm0)) ;original length
337 (cmpl (% len) (% imm0))
338 ;; If the new length is the same as the original length, we know
[7963]339 ;; that the bignum is at least two digits long (because if it was
340 ;; shorter, we would have branched directly to
341 ;; @maybe-return-fixnum), and thus won't fit in a fixnum.
342 ;; Therefore, there's no need to do either of the tests at
343 ;; @maybe-return-fixnum.
[7916]344 (je @done)
345 (movl (% len) (% imm0))
346 (shll ($ (- x8632::num-subtag-bits x8632::fixnumshift)) (% imm0))
347 (movb ($ x8632::subtag-bignum) (% imm0.b))
348 (movl (% imm0) (@ x8632::misc-header-offset (% bignum)))
349 @maybe-return-fixnum
350 ;; could use SETcc here to avoid one branch
[10959]351 (cmpl ($ (target-nil-value)) (@ 0 (% esp))) ;return-fixnum-p
[7914]352 (je @done)
[9567]353 (cmpl ($ x8632::one-digit-bignum-header)
354 (@ x8632::misc-header-offset (% bignum)))
[7916]355 (jne @done)
[7914]356 ;; Bignum has one digit. If it fits in a fixnum, return a fixnum.
357 (movl (@ x8632::misc-data-offset (% bignum)) (% imm0))
358 (box-fixnum imm0 arg_y)
[7916]359 (unbox-fixnum arg_y temp0)
360 (cmpl (% temp0) (% imm0))
361 (cmovel (% arg_y) (% arg_z))
[7914]362 @done
[7916]363 (pop (% imm0)) ;discard saved return-fixnum-p
[7914]364 (mark-as-node temp0)
365 (mark-as-node temp1)
[7963]366 (single-value-return)))
367
368;;; Multiply X[I] by the unboxed value of the (non-negative) fixnum Y;
369;;; add the incoming carry from CARRY[0] to the 64-bit product. Store
370;;; the low word of the 64-bit sum in R[0] and the high word in
371;;; CARRY[0].
372(defx8632lapfunction %multiply-and-add ((r 12) (carry 8) (x 4) #|(ra 0)|# (i arg_y) (y arg_z))
373 (let ((xx mm2)
374 (yy mm3)
375 (cc mm4))
[15454]376 (movl (@ x (% esp)) (% temp0))
377 (movd (@ x8632::misc-data-offset (% temp0) (% i)) (% xx))
[7963]378 (unbox-fixnum y imm0)
379 (movd (% imm0) (% yy))
380 (pmuludq (% xx) (% yy)) ;64 bit product
381 (movl (@ carry (% esp)) (% arg_y))
382 (movd (@ x8632::misc-data-offset (% arg_y)) (% cc))
383 (paddq (% cc) (% yy)) ;add in 32 bit carry digit
384 (movl (@ r (% esp)) (% arg_z))
385 (movd (% yy) (@ x8632::misc-data-offset (% arg_z)))
386 (psrlq ($ 32) (% yy))
387 (movd (% yy) (@ x8632::misc-data-offset (% arg_y)))
388 (single-value-return 5)))
389
[15578]390
391(defx8632lapfunction %multiply-and-add-fixnum-loop ((len 8) (x 4) #||(ra 0)||# (y arg_y) (result arg_z))
392 (let ((savey mm0)
393 (savei mm1)
394 (carry temp0)
395 (i temp1)
396 (rx arg_y))
397 (unbox-fixnum y imm0)
398 (movd (% imm0) (% savey))
[15581]399 (mark-as-imm temp1)
400 (mark-as-imm temp0)
[15578]401 (mov (@ x (% esp)) (% rx))
402 (xorl (% carry) (% carry))
403 (xorl (% i) (% i))
404 @loop
405 (movd (% i) (% savei))
406 (movd (% savey) (% eax))
407 (mull (@ x8632::misc-data-offset (% rx) (% i)))
408 (addl (% carry) (% eax))
409 (adcl ($ 0) (% edx))
410 (movl (% edx) (% carry))
411 (movd (% savei) (% i))
412 (movl (% eax) (@ x8632::misc-data-offset (% result) (% i)))
413 (addl ($ 4) (% i))
414 (cmpl (% i) (@ len (% esp)))
415 (jne @loop)
416 (movl (% carry) (@ x8632::misc-data-offset (% result) (% i)))
417 (mark-as-node temp1)
418 (mark-as-node temp0)
419 (single-value-return 4)))
420
421
[7963]422;; multiply x[i] by y and add to result starting at digit i
423(defx8632lapfunction %multiply-and-add-harder-loop-2
424 ((x 12) (y 8) (r 4) #|(ra 0)|# (i arg_y) (ylen arg_z))
425 (let ((cc mm2)
426 (xx mm3)
427 (yy mm4)
[9488]428 (rr mm5)
[7963]429 (j imm0))
430 (movl (@ x (% esp)) (% temp0))
431 (movd (@ x8632::misc-data-offset (% temp0) (% i)) (% xx)) ;x[i]
432 (movl (@ y (% esp)) (% temp0))
433 (movl (@ r (% esp)) (% temp1))
434 (pxor (% cc) (% cc))
435 (xorl (% j) (% j))
436 @loop
437 (movd (@ x8632::misc-data-offset (% temp0) (% j)) (% yy)) ;y[j]
438 (pmuludq (% xx) (% yy))
[9488]439 ;; 64-bit product now in %yy
440 (movd (@ x8632::misc-data-offset (% temp1) (% i)) (% rr))
441 ;; add in digit from r[i]
442 (paddq (% yy) (% rr))
443 ;; add in carry
444 (paddq (% cc) (% rr))
445 (movd (% rr) (@ x8632::misc-data-offset (% temp1) (% i))) ;update r[i]
446 (movq (% rr) (% cc))
447 (psrlq ($ 32) (% cc)) ;get carry digit into low word
[7963]448 (addl ($ '1) (% i))
449 (addl ($ '1) (% j))
450 (subl ($ '1) (% ylen))
451 (jg @loop)
452 (movd (% cc) (@ x8632::misc-data-offset (% temp1) (% i)))
453 (single-value-return 5)))
454
455;; this is silly
456(defx8632lapfunction %add-the-carry ((high 4) #|(ra 0)|# (low arg_y) (c arg_z))
457 (mark-as-imm temp0)
[7987]458 (let ((imm1 temp0)
459 (imm1.w temp0.w))
460 (pop (% temp1))
461 (popl (% imm1)) ;high
462 (discard-reserved-frame)
463 (push (% temp1))
464 (shll ($ (- 16 x8632::fixnumshift)) (% temp0))
465 (unbox-fixnum low imm0)
466 (orl (% imm0) (% imm1))
467 (unbox-fixnum c imm0)
468 (addl (% imm0) (% imm1))
469 (movzwl (% imm1.w) (% imm0))
470 (box-fixnum imm0 temp1)
471 (sarl ($ 16) (% imm1))
472 (shll ($ x8632::fixnumshift) (% imm1))
473 (push (% imm1)) ;high
474 (push (% temp1))) ;low
475 (mark-as-node temp0)
[7963]476 (set-nargs 2)
[7987]477 (leal (@ '2 (% esp)) (% temp0))
[7963]478 (jmp-subprim .SPvalues))
479
480(defx8632lapfunction %bignum-count-trailing-zero-bits ((bignum arg_z))
481 (let ((i arg_y)
482 (len temp0)
483 (zeros temp1))
484 (vector-length bignum temp0)
485 (xorl (% i) (% i))
[9605]486 (xorl (% zeros) (% zeros))
[7963]487 @loop
488 (movl (@ x8632::misc-data-offset (% bignum) (% i)) (% imm0))
[9605]489 (testl (% imm0) (% imm0))
490 (jnz @last)
491 (addl ($ '32) (% zeros))
[7963]492 (addl ($ '1) (% i))
[9605]493 (cmpl (% len) (% i))
494 (jb @loop)
495 @last
[7963]496 ;; now count zero bits in digit
[9605]497 (bsfl (% imm0) (% imm0))
498 (shll ($ x8632::fixnumshift) (% imm0))
[7963]499 (addl (% imm0) (% zeros))
500 (movl (% zeros) (% arg_z))
501 (single-value-return)))
502
503;;; dest[i] = (logand x[i] y[i])
504(defx8632lapfunction %bignum-logand ((idx 8) (x 4) #|(ra 0)|# (y arg_y) (dest arg_z))
505 (let ((i temp0)
506 (xx temp1)
507 (yy arg_y))
508 (movl (@ idx (% esp)) (% i))
509 (movl (@ x (% esp)) (% xx))
510 (movl (@ x8632::misc-data-offset (% xx) (% i)) (% imm0))
511 (andl (@ x8632::misc-data-offset (% yy) (% i)) (% imm0))
512 (movl (% imm0) (@ x8632::misc-data-offset (% dest) (% i)))
513 (single-value-return 4)))
514
515;;; dest[i] = (logandc1 x[i] y[i])
516(defx8632lapfunction %bignum-logandc1 ((idx 8) (x 4) #|(ra 0)|# (y arg_y) (dest arg_z))
517 (let ((i temp0)
518 (xx temp1)
519 (yy arg_y))
520 (movl (@ idx (% esp)) (% i))
521 (movl (@ x (% esp)) (% xx))
522 (movl (@ x8632::misc-data-offset (% xx) (% i)) (% imm0))
523 (not (% imm0))
524 (andl (@ x8632::misc-data-offset (% yy) (% i)) (% imm0))
525 (movl (% imm0) (@ x8632::misc-data-offset (% dest) (% i)))
526 (single-value-return 4)))
527
528;;; dest[i] = (logandc2 x[i] y[i])
529(defx8632lapfunction %bignum-logandc2 ((idx 8) (x 4) #|(ra 0)|# (y arg_y) (dest arg_z))
530 (let ((i temp0)
531 (xx temp1)
532 (yy arg_y))
533 (movl (@ idx (% esp)) (% i))
534 (movl (@ x (% esp)) (% xx))
535 (movl (@ x8632::misc-data-offset (% yy) (% i)) (% imm0))
536 (not (% imm0))
537 (andl (@ x8632::misc-data-offset (% xx) (% i)) (% imm0))
538 (movl (% imm0) (@ x8632::misc-data-offset (% dest) (% i)))
539 (single-value-return 4)))
540
541;;; dest[i] = (logior x[i] y[i])
542(defx8632lapfunction %bignum-logior ((idx 8) (x 4) #|(ra 0)|# (y arg_y) (dest arg_z))
543 (let ((i temp0)
544 (xx temp1)
545 (yy arg_y))
546 (movl (@ idx (% esp)) (% i))
547 (movl (@ x (% esp)) (% xx))
548 (movl (@ x8632::misc-data-offset (% xx) (% i)) (% imm0))
549 (orl (@ x8632::misc-data-offset (% yy) (% i)) (% imm0))
550 (movl (% imm0) (@ x8632::misc-data-offset (% dest) (% i)))
551 (single-value-return 4)))
552
553;;; dest[i] = (lognot x[i])
[8213]554(defx8632lapfunction %bignum-lognot ((idx 4) #|(ra 0)|# (x arg_y) (dest arg_z))
[7963]555 (let ((i temp0))
556 (movl (@ idx (% esp)) (% i))
557 (movl (@ x8632::misc-data-offset (% x) (% i)) (% imm0))
558 (not (% imm0))
559 (movl (% imm0) (@ x8632::misc-data-offset (% dest) (% i)))
560 (single-value-return 3)))
561
562;;; dest[i] = (logxor x[i] y[i])
563(defx8632lapfunction %bignum-logxor ((idx 8) (x 4) #|(ra 0)|# (y arg_y) (dest arg_z))
564 (let ((i temp0)
565 (xx temp1)
566 (yy arg_y))
567 (movl (@ idx (% esp)) (% i))
568 (movl (@ x (% esp)) (% xx))
569 (movl (@ x8632::misc-data-offset (% xx) (% i)) (% imm0))
570 (xorl (@ x8632::misc-data-offset (% yy) (% i)) (% imm0))
571 (movl (% imm0) (@ x8632::misc-data-offset (% dest) (% i)))
572 (single-value-return 4)))
573
[9269]574;;; 0 if a[i] = b[i]; 1 if a[i] > b[i]; -1 if a[i] < b[i]
[7963]575(defx8632lapfunction %compare-digits ((a 4) #|(ra 0)|# (b arg_y) (i arg_z))
576 (movl (@ a (% esp)) (% temp0))
577 (movl (@ x8632::misc-data-offset (% temp0) (% i)) (% imm0))
[9269]578 (movl ($ '1) (% temp0))
579 (movl ($ '-1) (% temp1))
580 (subl (@ x8632::misc-data-offset (% b) (% i)) (% imm0))
[9776]581 (cmoval (% temp0) (% imm0))
582 (cmovbl (% temp1) (% imm0))
[9269]583 (movl (% imm0) (% arg_z))
[7963]584 (single-value-return 3))
585
586;; returns number of bits in digit-hi,digit-lo that are sign bits
587;; 32 - digits-sign-bits is integer-length
588(defx8632lapfunction %digits-sign-bits ((hi arg_y) (lo arg_z))
589 (mark-as-imm temp0)
590 (shll ($ (- 16 x8632::fixnumshift)) (% hi))
591 (unbox-fixnum lo imm0)
592 (orl (% hi) (% imm0))
593 (movl (% imm0) (% temp0))
594 (not (% imm0))
595 (testl (% temp0) (% temp0))
596 (js @wasneg)
597 (not (% imm0))
598 @wasneg
599 (bsrl (% imm0) (% imm0))
600 (sete (% temp0.b))
601 (xorl ($ 31) (% imm0))
602 (addb (% temp0.b) (% imm0.b))
603 (box-fixnum imm0 arg_z)
604 (mark-as-node temp0)
605 (single-value-return))
606
[9400]607(defx8632lapfunction macptr->fixnum ((ptr arg_z))
608 (macptr-ptr arg_z ptr)
609 (single-value-return))
610
[15581]611;;; Return macptr's address as a fixnum, or NIL if that address
612;;; isn't aligned on a 64-bit boundary.
613(defx8632lapfunction %64-bit-aligned-macptr->fixnum ((ptr arg_z))
614 (macptr-ptr ptr imm0)
615 (movl ($ nil) (% arg_z))
616 (test ($ 7) (% arg_z))
617 (cmovel (% imm0) (% arg_z))
618 (single-value-return))
619
620(defx8632lapfunction %32-bit-aligned-macptr->fixnum ((ptr arg_z))
621 (macptr-ptr ptr imm0)
622 (movl ($ nil) (% arg_z))
623 (movl (% arg_z) (% arg_y))
624 (test ($ 3) (% imm0))
625 (jne @ret)
626 (movl (% imm0) (% arg_z))
627 (xorl (% arg_y) (% arg_y))
628 @ret
629 (movl (% esp) (% temp0))
630 (pushl (% arg_z))
631 (pushl (% arg_y))
632 (set-nargs 2)
633 (jmp-subprim .SPvalues))
634
[7963]635; if dest not nil store unboxed result in dest(0), else return a fixnum
636(defx8632lapfunction fix-digit-logandc2 ((fix 4) #|(ra 0)|# (big arg_y) (dest arg_z)) ; index 0
637 (mark-as-imm temp0)
638 (movl (@ fix (% esp)) (% temp0))
639 (unbox-fixnum temp0 temp0)
640 (movl (@ x8632::misc-data-offset (% big)) (% imm0))
641 (not (% imm0))
642 (andl (% temp0) (% imm0))
643 (mark-as-node temp0)
[10959]644 (cmpl ($ (target-nil-value)) (% dest))
[7963]645 (jne @store)
646 (box-fixnum imm0 arg_z)
647 (single-value-return 3)
648 @store
649 (movl (% imm0) (@ x8632::misc-data-offset (% dest)))
650 (single-value-return 3))
651
[9368]652(defx8632lapfunction fix-digit-logandc1 ((fix 4) #|(ra 0)|# (big arg_y) (dest arg_z)) ; index 0
653 (mark-as-imm temp0)
654 (movl (@ fix (% esp)) (% temp0))
655 (unbox-fixnum temp0 temp0)
656 (movl (@ x8632::misc-data-offset (% big)) (% imm0))
657 (not (% temp0))
658 (andl (% temp0) (% imm0))
659 (mark-as-node temp0)
[10959]660 (cmpl ($ (target-nil-value)) (% dest))
[9368]661 (jne @store)
662 (box-fixnum imm0 arg_z)
663 (single-value-return 3)
664 @store
665 (movl (% imm0) (@ x8632::misc-data-offset (% dest)))
666 (single-value-return 3))
667
[9174]668(defx8632lapfunction fix-digit-logand ((fix 4) #|(ra 0)|# (big arg_y) (dest arg_z)) ; index 0
669 (mark-as-imm temp0)
670 (movl (@ fix (% esp)) (% temp0))
671 (sarl ($ x8632::fixnumshift) (% temp0))
672 (movl (@ x8632::misc-data-offset (% big)) (% imm0))
673 (andl (% temp0) (% imm0))
674 (mark-as-node temp0)
[10959]675 (cmpl ($ (target-nil-value)) (% dest))
[9174]676 (jne @store)
677 (box-fixnum imm0 arg_z)
678 (single-value-return 3)
679 @store
680 (movl (% imm0) (@ x8632::misc-data-offset (% dest)))
681 (single-value-return 3))
682
683
[7963]684(defx8632lapfunction digit-lognot-move ((index 4) #|(ra 0)|# (source arg_y) (dest arg_z))
685 (movl (@ index (% esp)) (% temp0))
686 (movl (@ x8632::misc-data-offset (% source) (% temp0)) (% imm0))
687 (not (% imm0))
688 (movl (% imm0) (@ x8632::misc-data-offset (% dest) (% temp0)))
689 (single-value-return 3))
690
691;; Add b to a starting at a[i]
692;; might want not to use SSE2 for this. use lea to update loop counter
693;; variables so that the flags don't get set.
694(defx8632lapfunction bignum-add-loop-+ ((i 8) (a 4) #|(ra 0)|# (b arg_y) (blen arg_z))
695 (let ((aa mm2)
696 (bb mm3)
697 (cc mm4))
698 (movl (@ a (% esp)) (% temp0))
699 (movl (@ i (% esp)) (% temp1))
700 (xorl (% imm0) (% imm0))
701 (pxor (% cc) (% cc))
702 @loop
703 (movd (@ x8632::misc-data-offset (% temp0) (% temp1)) (% aa))
704 (movd (@ x8632::misc-data-offset (% b) (% imm0)) (% bb))
705 (paddq (% bb) (% aa))
706 (paddq (% cc) (% aa))
707 (movd (% aa) (@ x8632::misc-data-offset (% temp0) (% temp1)))
708 (psrlq ($ 32) (% aa))
709 (movq (% aa) (% cc))
710 (addl ($ '1) (% temp1))
711 (addl ($ '1) (% imm0))
712 (subl ($ '1) (% blen))
713 (jg @loop)
714 ;; add in final carry
715 (movd (% cc) (% imm0))
716 (addl (% imm0) (@ x8632::misc-data-offset (% temp0) (% temp1)))
717 (single-value-return 4)))
718
719(defx8632lapfunction bignum-logtest-loop ((count 4) #|(ra 0)|# (s1 arg_y) (s2 arg_z))
720 (let ((i temp1)
721 (c temp0))
722 (movl (@ count (% esp)) (% c))
723 (xorl (% i) (% i))
724 @loop
725 (movl (@ x8632::misc-data-offset (% s1) (% i)) (% imm0))
726 (test (@ x8632::misc-data-offset (% s2) (% i)) (% imm0))
727 (jnz @true)
728 (addl ($ '1) (% i))
729 (cmpl (% i) (% c))
730 (jg @loop)
[10959]731 (movl ($ (target-nil-value)) (% arg_z))
[7963]732 (single-value-return 3)
733 @true
[10959]734 (movl ($ (target-t-value)) (% arg_z))
[7963]735 (single-value-return 3)))
736
[9552]737;;; shift bignum left by nbits bits (1 <= nbits < 32)
[9622]738;;; start storing into result at digit j
[9174]739(defx8632lapfunction bignum-shift-left-loop ((nbits 12) (result 8)
740 (bignum 4) #|(ra 0)|#
741 (res-len-1 arg_y) (j arg_z))
[9552]742 (movl (% ebp) (@ 16 (% esp)))
743 (leal (@ 16 (% esp)) (% ebp))
744 (popl (@ 4 (% ebp)))
745 (push (% arg_y)) ;ebp - 16
746 (push (% arg_z)) ;ebp - 20
747
748 (movl (@ -4 (% ebp)) (% imm0))
[9174]749 (sarl ($ x8632::fixnumshift) (% imm0))
750 (movd (% imm0) (% mm7)) ;shift count
[9489]751 (negl (% imm0))
[9552]752 (addl ($ 32) (% imm0))
[9174]753 (movd (% imm0) (% mm6)) ;remaining bits
[7963]754
[9552]755 (let ((rl-1 -16)
756 (r temp0)
757 (b temp1)
758 (i arg_y)
759 (i+1 imm0))
760 (movl (@ -8 (% ebp)) (% r))
761 (movl (@ -12 (% ebp)) (% b))
762 (xorl (% i) (% i))
763 (movl ($ '1) (% i+1))
764 ;; j (in arg_z) is already (1+ digits)
765 (jmp @test)
766 @loop
767 (movd (@ x8632::misc-data-offset (% b) (% i)) (% mm0))
768 (psrlq (% mm6) (% mm0))
769 (movd (@ x8632::misc-data-offset (% b) (% i+1)) (% mm1))
770 (psllq (% mm7) (% mm1))
771 (por (% mm1) (% mm0))
772 (movd (% mm0) (@ x8632::misc-data-offset (% r) (% j)))
773 (movl (% i+1) (% i))
[9622]774 (addl ($ '1) (% i+1))
[9552]775 (addl ($ '1) (% j))
776 @test
777 (cmpl (@ rl-1 (% ebp)) (% j))
778 (jne @loop)
779 (movd (@ x8632::misc-data-offset (% b)) (% mm0))
780 (psllq (% mm7) (% mm0))
781 (movl (@ -20 (% ebp)) (% imm0)) ;digits + 1 (that is, the original j)
782 (subl ($ '1) (% imm0)) ;digits
783 (movd (% mm0) (@ x8632::misc-data-offset (% r) (% imm0)))
784 (movd (@ x8632::misc-data-offset (% b) (% i)) (% mm0))
785 (psrad (% mm6) (% mm0))
786 (movd (% mm0) (@ x8632::misc-data-offset (% r) (% j))))
787 (leave)
788 (ret))
789
[9539]790;;; shift bignum right by i words plus nbits bits.
791(defx8632lapfunction bignum-shift-right-loop-1 ((nbits 12) (result 8)
792 (bignum 4) #|(ra 0)|#
793 (res-len-1 arg_y)
794 (i arg_z))
795 (movl (@ nbits (% esp)) (% imm0))
796 (sarl ($ x8632::fixnumshift) (% imm0))
797 (movd (% imm0) (% mm7)) ;shift count
798
799 (movl (@ result (% esp)) (% temp0))
800 (movl (@ bignum (% esp)) (% temp1))
801 (push (% res-len-1))
802 (xorl (% arg_y) (% arg_y)) ;index into result
803 (jmp @test)
804 @loop
805 (movq (@ x8632::misc-data-offset (% temp1) (% i)) (% mm0)) ;b[i+1] || b[i]
806 (psrlq (% mm7) (% mm0))
807 (movd (% mm0) (@ x8632::misc-data-offset (% temp0) (% arg_y)))
808 (addl ($ '1) (% i))
809 (addl ($ '1) (% arg_y))
810 @test
811 (cmpl (@ (% esp)) (% arg_y)) ;compare to res-len-1
812 (jne @loop)
813 (addl ($ x8632::node-size) (% esp))
814 @finish
815 (movd (@ x8632::misc-data-offset (% temp1) (% i)) (% mm0)) ;last digit of b
816 (psrad (% mm7) (% mm0))
817 (movd (% mm0) (@ x8632::misc-data-offset (% temp0) (% arg_y)))
818 (single-value-return 5))
819
[7963]820(defx8632lapfunction %logcount-complement ((bignum arg_y) (i arg_z))
821 (mark-as-imm temp0)
822 (let ((rshift imm0)
823 (temp temp0))
824 (movl (@ x8632::misc-data-offset (% bignum) (% i)) (% rshift))
825 (notl (% rshift))
826 (xorl (% arg_z) (% arg_z))
827 (testl (% rshift) (% rshift))
828 (jmp @test)
829 @next
830 (lea (@ -1 (% rshift)) (% temp))
831 (and (% temp) (% rshift)) ;sets flags
832 (lea (@ '1 (% arg_z)) (% arg_z)) ;doesn't set flags
833 @test
834 (jne @next)
835 (mark-as-node temp0)
836 (single-value-return)))
837
838(defx8632lapfunction %logcount ((bignum arg_y) (i arg_z))
839 (mark-as-imm temp0)
840 (let ((rshift imm0)
841 (temp temp0))
842 (movl (@ x8632::misc-data-offset (% bignum) (% i)) (% rshift))
843 (xorl (% arg_z) (% arg_z))
844 (testl (% rshift) (% rshift))
845 (jmp @test)
846 @next
847 (lea (@ -1 (% rshift)) (% temp))
848 (and (% temp) (% rshift)) ;sets flags
849 (lea (@ '1 (% arg_z)) (% arg_z)) ;doesn't set flags
850 @test
851 (jne @next)
852 (mark-as-node temp0)
853 (single-value-return)))
854
855
856;;; Divide bignum x by single digit y (passed as two halves).
857;;; The quotient in stored in q, and the remainder is returned
[9003]858;;; in two halves. (cf. Knuth, 4.3.1, exercise 16)
859(defx8632lapfunction %floor-loop-quo ((x 8) (res 4) #|(ra 0)|# (yhi arg_y) (ylo arg_z))
860 (compose-digit yhi ylo imm0)
[10575]861 (movl (% imm0) (:rcontext x8632::tcr.unboxed0))
[9003]862 (pop (% temp0))
863 (pop (% arg_z)) ;res
864 (pop (% arg_y)) ;x
865 (discard-reserved-frame)
866 (push (% temp0))
867 (mark-as-imm edx) ;aka temp1
868 (let ((bignum arg_y) ;bignum dividend
869 (result arg_z)) ;bignum result (quotient)
870 (xorl (% edx) (% edx))
871 (vector-length bignum temp0)
872 (jmp @next)
873 @loop
874 (movl (@ x8632::misc-data-offset (% bignum) (% temp0)) (% eax))
[10575]875 (divl (:rcontext x8632::tcr.unboxed0))
[9003]876 (movl (% eax) (@ x8632::misc-data-offset (% result) (% temp0)))
877 @next
878 (subl ($ '1) (% temp0))
879 (jge @loop))
880 (movl (% esp) (% temp0))
881 ;; extract and push high half of remainder
882 (movl ($ (- #x10000)) (% arg_y))
883 (andl (% edx) (% arg_y))
884 (shrl ($ (- 16 x8632::fixnumshift)) (% arg_y))
885 (push (% arg_y))
886 ;; extract and push low half
[9624]887 (andl ($ #xffff) (% edx))
888 (shll ($ x8632::fixnumshift) (% edx))
[9003]889 (push (% edx))
890 (mark-as-node edx)
891 (set-nargs 2)
892 (jmp-subprim .SPvalues))
[7963]893
[9003]894;;; For TRUNCATE-BY-FIXNUM et al.
895;;; Doesn't store quotient: just returns rem in 2 halves.
896;;; Could avoid using tcr.unboxed0 if it matters...
897(defx8632lapfunction %floor-loop-no-quo ((x 4) #|(ra 0)|# (yhi arg_y) (ylo arg_z))
898 (compose-digit yhi ylo imm0)
[10575]899 (movl (% imm0) (:rcontext x8632::tcr.unboxed0))
[9003]900 (pop (% temp0))
901 (pop (% arg_y))
902 (discard-reserved-frame)
903 (push (% temp0))
904 (mark-as-imm edx) ;aka temp1
905 (let ((bignum arg_y) ;bignum dividend
906 (result arg_z)) ;bignum result (quotient)
907 (xorl (% edx) (% edx))
908 (vector-length bignum temp0)
909 (jmp @next)
910 @loop
911 (movl (@ x8632::misc-data-offset (% bignum) (% temp0)) (% eax))
[10575]912 (divl (:rcontext x8632::tcr.unboxed0))
[9003]913 ;;(movl (% eax) (@ x8632::misc-data-offset (% result) (% temp0)))
914 @next
915 (subl ($ '1) (% temp0))
916 (jge @loop))
917 (movl (% esp) (% temp0))
918 ;; extract and push high half of remainder
919 (movl ($ (- #x10000)) (% arg_y))
920 (andl (% edx) (% arg_y))
921 (shrl ($ (- 16 x8632::fixnumshift)) (% arg_y))
922 (push (% arg_y))
923 ;; extract and push low half
[9624]924 (andl ($ #xffff) (% edx))
925 (shll ($ x8632::fixnumshift) (% edx))
[9003]926 (push (% edx))
927 (mark-as-node edx)
928 (set-nargs 2)
929 (jmp-subprim .SPvalues))
[7963]930
[9552]931;;; transliterated from bignum-truncate-guess in l0-bignum64.lisp
932;;; this is not beautiful...
[8827]933(defx8632lapfunction truncate-guess-loop ((guess-h 16) (guess-l 12) (x 8)
934 (xidx 4) #|(ra 0)|#
935 (yptr arg_y) (yidx arg_z))
[11080]936 (save-stackargs-frame 4)
[9552]937 (push (% arg_y))
938 (push (% arg_z))
[7963]939
[9552]940 (movl (@ -4 (% ebp)) (% temp0)) ;guess-h
941 (movl (@ -8 (% ebp)) (% temp1)) ;guess-l
942 (compose-digit temp0 temp1 imm0)
943 (movd (% imm0) (% mm0)) ;save guess
944
[11080]945 @loop
946 (movl (@ (% esp)) (% yidx))
947 (movl (@ 4 (% esp)) (% yptr))
[9552]948 (movd (@ (- x8632::misc-data-offset 0) (% yptr) (% yidx)) (% mm1)) ;y1 (high)
949 ;; (%multiply guess y1)
950 (pmuludq (% mm0) (% mm1))
951 ;; (%multiply guess y2)
952 (movd (@ (- x8632::misc-data-offset 4) (% yptr) (% yidx)) (% mm2)) ;y2 (low)
953 (pmuludq (% mm0) (% mm2))
954
955 (movl (@ -12 (% ebp)) (% temp0)) ;x
956 (movl (@ -16 (% ebp)) (% arg_y)) ;xidx
957 (mark-as-imm temp1) ;edx now unboxed
958
959 ;; (%subtract-with-borrow x-i-1 low-guess*y1 1)
960 (movl (@ (- x8632::misc-data-offset 4) (% temp0) (% arg_y)) (% edx)) ;x-i-1
961 (movd (% mm1) (% eax)) ;low part of y1*guess
962 (subl (% eax) (% edx))
963 (movd (% edx) (% mm6)) ;save middle digit
964 ;; (%subtract-with-borrow x-i high-guess*y1 borrow)
965 (movl (@ (- x8632::misc-data-offset 0) (% temp0) (% arg_y)) (% edx)) ;x-i
966 (movq (% mm1) (% mm3))
967 (psrlq ($ 32) (% mm3)) ;get high part into low half
968 (movd (% mm3) (% eax)) ;high part of y1*guess
969 (sbbl (% eax) (% edx))
970 (movd (% edx) (% mm7)) ;save high digit
[11080]971 ;; see if guess is suitable
[9552]972 ;; if (and (= high-digit 0)
973 (test (% edx) (% edx))
974 (jne @return)
975 ;; (or (> high-guess*y2 middle-digit)
976 (movq (% mm2) (% mm3))
977 (psrlq ($ 32) (% mm3))
978 (movd (% mm3) (% eax)) ;high part of y2*guess
979 (movd (% mm6) (% edx)) ;middle-digit
980 (cmpl (% edx) (% eax))
[9797]981 (ja @decrement)
[9552]982 ;; (and (= middle-digit high-guess*y2)
[9624]983 (jne @return)
[9552]984 ;; (> low-guess*y2 x-i-2)
985 (movd (% mm2) (% eax)) ;low part of y2*guess
986 (movl (@ (- x8632::misc-data-offset 8) (% temp0) (% arg_y)) (% edx)) ;x-i-2
987 (cmpl (% edx) (% eax))
[9797]988 (ja @decrement)
[9552]989 @return
990 (mark-as-node edx)
991 (leave)
992 (movl (% esp) (% temp0))
993 (movd (% mm0) (% imm0))
994 (shrl ($ 16) (% imm0))
995 (shll ($ x8632::fixnumshift) (% imm0)) ;high half
996 (push (% imm0))
997 (movd (% mm0) (% imm0))
[9624]998 (andl ($ #xffff) (% imm0))
999 (shll ($ x8632::fixnumshift) (% imm0))
[9552]1000 (push (% imm0)) ;low half
1001 (set-nargs 2)
1002 (jmp-subprim .SPvalues)
1003 @decrement
1004 (movd (% mm0) (% imm0)) ;guess
[9624]1005 (subl ($ 1) (% imm0))
[9552]1006 (movd (% imm0) (% mm0))
[11080]1007 (jmp @loop))
[9552]1008
[9488]1009;;; If x[i] = y[j], return the all ones digit (as two halves).
1010;;; Otherwise, compute floor x[i]x[i-1] / y[j].
[8827]1011(defx8632lapfunction %floor-99 ((x-stk 8) (xidx 4) #|(ra 0)|#
1012 (yptr arg_y) (yidx arg_z))
[10923]1013 (pop (% temp1))
1014 (pop (% imm0))
[9488]1015 (pop (% temp0))
1016 (discard-reserved-frame)
[10923]1017 (push (% temp1))
1018 (movl (% imm0) (% temp1))
1019 (movl (@ x8632::misc-data-offset (% temp0) (% temp1)) (% imm0)) ;x[i]
[9552]1020 (cmpl (% imm0) (@ x8632::misc-data-offset (% yptr) (% yidx))) ;y[j]
[9488]1021 (jne @more)
1022 (pushl ($ '#xffff))
1023 (pushl ($ '#xffff))
1024 (lea (@ '2 (% esp)) (% temp0))
1025 (set-nargs 2)
1026 (jmp-subprim .SPvalues)
1027 @more
1028 (mark-as-imm edx) ;aka temp1 (contains a fixnum)
[10923]1029 (movl (@ (- x8632::misc-data-offset 4) (% temp0) (% temp1)) (% eax)) ;low
1030 (movl (@ x8632::misc-data-offset (% temp0) (% temp1)) (% edx)) ;high digit
[9623]1031 (divl (@ x8632::misc-data-offset (% yptr) (% yidx)))
[9488]1032 (mark-as-node edx)
1033 ;; extract and push high half of quotient
1034 (movl ($ (- #x10000)) (% arg_y))
1035 (andl (% eax) (% arg_y))
1036 (shrl ($ (- 16 x8632::fixnumshift)) (% arg_y))
1037 (push (% arg_y))
1038 ;; extract and push low half
[9623]1039 (andl ($ #xffff) (% eax))
[9488]1040 (shll ($ x8632::fixnumshift) (% eax))
1041 (push (% eax))
1042 (set-nargs 2)
1043 (lea (@ '2 (% esp)) (% temp0))
1044 (jmp-subprim .SPvalues))
[8827]1045
[9552]1046;;; x * y + carry
[8827]1047(defx8632lapfunction %multiply-and-add-1 ((x-high 16)
1048 (x-low 12)
1049 (y-high 8)
1050 (y-low 4)
1051 #|(ra 0)|#
1052 (carry-in-high arg_y)
1053 (carry-in-low arg_z))
[9552]1054 (movl (@ x-high (% esp)) (% temp0))
1055 (movl (@ x-low (% esp)) (% temp1))
1056 (compose-digit temp0 temp1 imm0)
1057 (movd (% imm0) (% mm0))
1058 (movl (@ y-high (% esp)) (% temp0))
1059 (movl (@ y-low (% esp)) (% temp1))
1060 (compose-digit temp0 temp1 imm0)
1061 (movd (% imm0) (% mm1))
1062 (pmuludq (% mm1) (% mm0)) ;x * y
1063 (compose-digit arg_y arg_z imm0)
1064 (movd (% imm0) (% mm1))
1065 (paddq (% mm1) (% mm0)) ;add in carry digit
1066 (movq (% mm0) (% mm1))
1067 (psrlq ($ 32) (% mm1)) ;resultant carry digit
1068 ;; clean up stack
1069 (pop (% temp0))
1070 (addl ($ '6) (% esp))
1071 (push (% temp0))
1072 ;; return (values carry-h carry-l result-h result-l)
1073 (movl (% esp) (% temp0))
1074 (movd (% mm1) (% imm0))
1075 (shrl ($ 16) (% imm0))
1076 (shll ($ x8632::fixnumshift) (% imm0)) ;carry-h
1077 (push (% imm0))
1078 (movd (% mm1) (% imm0))
1079 (shll ($ 16) (% imm0))
1080 (shrl ($ (- 16 x8632::fixnumshift)) (% imm0)) ;carry-l
1081 (push (% imm0))
1082 (movd (% mm0) (% imm0))
1083 (shrl ($ 16) (% imm0))
1084 (shll ($ x8632::fixnumshift) (% imm0)) ;result-h
1085 (push (% imm0))
1086 (movd (% mm0) (% imm0))
1087 (shll ($ 16) (% imm0))
1088 (shrl ($ (- 16 x8632::fixnumshift)) (% imm0)) ;result-l
1089 (push (% imm0))
1090 (set-nargs 4)
1091 (jmp-subprim .SPvalues))
[8827]1092
1093;;; Copy the limb SRC points to to where DEST points.
1094(defx8632lapfunction copy-limb ((src arg_y) (dest arg_z))
1095 (int ($ 3)))
1096
1097;;; Return T iff LIMB contains 0.
1098(defx8632lapfunction limb-zerop ((limb arg_z))
1099 (int ($ 3)))
1100
1101;;; Return -1,0,1 according to whether the contents of Y are
1102;;; <,=,> the contents of Z.
1103(defx8632lapfunction compare-limbs ((y arg_y) (z arg_z))
1104 (int ($ 3)))
1105
1106;;; Add a fixnum to the limb LIMB points to. Ignore overflow.
1107(defx8632lapfunction add-fixnum-to-limb ((fixnum arg_y) (limb arg_z))
1108 (int ($ 3)))
1109
1110;;; Store a fixnum value where LIMB points.
1111(defx8632lapfunction copy-fixnum-to-limb ((fixnum arg_y) (limb arg_z))
1112 (int ($ 3)))
1113
1114;;; Increment a "LIMB VECTOR" (bignum) by a small amount. The caller
1115;;; knows that carries will only propagate for a word or two.
1116(defx8632lapfunction mpn-incr-u ((limb arg_y) (fixby arg_z))
1117 (int ($ 3)))
1118
1119;;; Store XP-YP at WP; return carry (0 or 1).
1120;;; wp, xp, yp: word-aligned, unboxed ptrs (fixnums)
1121;;; size: boxed fixnum
1122;;; returns boxed carry
1123(defx8632lapfunction mpn-sub-n ((wp 8) (xp 4) #|(ra 0)|#
1124 (yp arg_y) (size arg_z))
1125 (int ($ 3)))
1126
1127;;; Store XP+YP at WP; return carry (0 or 1).
1128;;; wp, xp, yp = word-aligned, unboxed macptrs (fixnums).
1129;;; size = boxed fixnum
1130;;; result = boxed carry
1131(defx8632lapfunction mpn-add-n ((wp 8) (xp 4) #|(ra 0)|#
1132 (yp arg_y) (size arg_z))
1133 (int ($ 3)))
1134
1135;;; Add the single limb LIMB to S1P (propagating carry.) Store the
1136;;; result at RP. RP and S1P may be the same place, so check for
1137;;; that and do nothing after carry stops propagating. Return carry.
1138(defx8632lapfunction mpn-add-1 ((rp-offset 8) (s1p 4) #|(ra 0)|#
1139 (size arg_y) (limb arg_z))
1140 (int ($ 3)))
1141
1142;;; Multiply the limb vector S1 by the single limb at LIMBPTR, storing
1143;;; the result at RES. Store the "carry out" (high word of last 64-bit
1144;;; partial product) at the limb RESULT.
1145;;; res, s1, limbptr, result:
1146;;; unboxed, word-aligned ptrs (fixnums). size: boxed fixnum
1147;;; It'd be hard to transliterate the GMP code here; the GMP version
1148;;; uses lots more immediate registers than we can easily use in LAP
1149;;; (and is much more aggressively pipelined).
1150(defx8632lapfunction mpn-mul-1 ((res-offset 12)
1151 (s1-offset 8)
1152 (size 4)
1153 #|(ra 0)|#
1154 (limbptr arg_y)
1155 (result arg_z))
1156 (int ($ 3)))
1157
1158;;; multiply s1*limb and add result to res
1159;;; res, s1, limbptr, result:
1160;;; unboxed, word-aligned ptrs (fixnums).
1161;;; size: boxed fixnum
1162;;; limbptr: source "limb".
1163;;; result: carry out (high word of product).
1164(defx8632lapfunction mpn-addmul-1 ((res-offset 12)
1165 (s1-offset 8)
1166 (size 4)
1167 #|(ra 0)|#
1168 (limbptr arg_y)
1169 (result arg_z))
1170 (int ($ 3)))
1171
1172;;; Multiply the UN-word limb vector at UP and the VN-word limb vector
1173;;; at VP, store the result at RP.
1174(defx8632lapfunction mpn-mul-basecase ((rp-offset 12)
1175 (up-offset 8)
1176 (un 4)
1177 #|(ra 0)|#
1178 (vp arg_y)
1179 (vn arg_z))
1180 (int ($ 3)))
1181
1182;;; left-shift src by 1 bit, storing result at res. Return
1183;;; the bit that was shifted out.
1184(defx8632lapfunction mpn-lshift-1 ((resptr 4) #|(ra 0)|#
1185 (s1ptr arg_y) (size-arg arg_z))
1186 (int ($ 3)))
1187
1188;;; Do a 32x32=64 unsigned multiply of the words at X and Y. Store
1189;;; result (low word first) at RESULT.
1190(defx8632lapfunction umulppm ((x 4) #|(ra 0)|# (y arg_y) (result arg_z))
1191 (int ($ 3)))
1192
1193(defx8632lapfunction %fixnum-to-bignum-set ((bignum arg_y) (fixnum arg_z))
1194 (unbox-fixnum fixnum imm0)
1195 (movl (% imm0) (@ x8632::misc-data-offset (% bignum)))
1196 (single-value-return))
[9003]1197
1198(defx8632lapfunction bignum-negate-loop-really ((bignum 4) #|(ra 0)|#
1199 (len arg_y) (result arg_z))
1200 (mark-as-imm edx) ;aka %temp1
1201 (unbox-fixnum arg_y edx)
1202 (movl (@ bignum (% esp)) (% arg_y))
1203 (xorl (% temp0) (% temp0))
1204 (stc)
1205 @loop
1206 (movl (@ x8632::misc-data-offset (% arg_y) (% temp0)) (% imm0))
1207 (not (% imm0))
1208 (adc ($ 0) (% imm0))
1209 (movl (% imm0) (@ x8632::misc-data-offset (% arg_z) (% temp0)))
1210 (lea (@ x8632::node-size (% temp0)) (% temp0))
1211 (decl (% edx)) ;preserves carry flag
1212 (jg @loop)
1213 ;; return carry
1214 (setc (% imm0.b))
1215 (movzbl (% imm0.b) (% imm0))
1216 (box-fixnum imm0 arg_z)
1217 (mark-as-node edx)
1218 (single-value-return 3))
1219
1220(defx8632lapfunction %bignum-set ((bignum 8) (i 4) #|(ra 0)|#
1221 (high arg_y) (low arg_z))
1222 (compose-digit high low imm0)
1223 (movl (@ bignum (% esp)) (% arg_z))
1224 (movl (@ i (% esp)) (% arg_y))
1225 (movl (% imm0) (@ x8632::misc-data-offset (% arg_z) (% arg_y)))
1226 (single-value-return 4))
1227
Note: See TracBrowser for help on using the repository browser.