close Warning: Can't use blame annotator:
No changeset 1633 in the repository

source: release/1.4/source/level-0/PPC/PPC32/ppc32-bignum.lisp

Last change on this file was 13075, checked in by R. Matthew Emerson, 15 years ago

Merge trunk changes r13066 through r13067.
(copyright notices)

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 47.3 KB
RevLine 
1;;-*- Mode: Lisp; Package: CCL -*-
2;;;
3;;; Copyright (C) 2009 Clozure Associates
4;;; Copyright (C) 1994-2001 Digitool, Inc
5;;; This file is part of Clozure CL.
6;;;
7;;; Clozure CL is licensed under the terms of the Lisp Lesser GNU Public
8;;; License , known as the LLGPL and distributed with Clozure CL as the
9;;; file "LICENSE". The LLGPL consists of a preamble and the LGPL,
10;;; which is distributed with Clozure CL as the file "LGPL". Where these
11;;; conflict, the preamble takes precedence.
12;;;
13;;; Clozure CL is referenced in the preamble as the "LIBRARY."
14;;;
15;;; The LLGPL is also available online at
16;;; http://opensource.franz.com/preamble.html
17
18
19(in-package "CCL")
20
21(eval-when (:compile-toplevel :execute)
22 (require "PPC32-ARCH")
23 (require "PPC-LAPMACROS")
24
25 ;; Set RES to 1 if (u< x y), to 0 otherwise.
26 (defppclapmacro sltu (res x y)
27 `(progn
28 (subfc ,res ,x ,y)
29 (subfe ,res ,res ,res)
30 (neg ,res ,res)))
31
32 (defppclapmacro 48x32-divide (x-hi16 x-lo y freg temp-freg freg2 immx)
33 `(let ((temp 16)
34 (temp.h 16)
35 (temp.l 20)
36 (zero 8)
37 (zero.h 8)
38 (zero.l 12))
39 (stwu tsp -24 tsp)
40 (stw tsp 4 tsp)
41 (lwi ,immx #x43300000) ; 1075 = 1022+53
42 (stw ,immx zero.h tsp)
43 (stw rzero zero.l tsp)
44 (lfd ,temp-freg zero tsp)
45 (rlwimi ,immx ,x-hi16 0 16 31)
46 (stw ,immx temp.h tsp)
47 (stw ,x-lo temp.l tsp)
48 (lfd ,freg temp tsp)
49
50 (fsub ,freg ,freg ,temp-freg)
51 (lwi ,immx #x43300000)
52 (stw ,immx temp.h tsp)
53 (stw ,y temp.l tsp)
54 (lfd ,freg2 temp tsp)
55 (lwz tsp 0 tsp)
56 (fsub ,freg2 ,freg2 ,temp-freg)
57 (fdiv ,freg ,freg ,freg2)
58 ))
59
60 )
61
62;;; %BIGNUM-REF needs to access bignums as obviously as possible, and it needs
63;;; to be able to return 32 bits somewhere no one looks for real objects.
64;;;
65;;; The easiest thing to do is to store the 32 raw bits in two fixnums
66;;; and return multiple values.
67(defppclapfunction %bignum-ref ((bignum arg_y) (i arg_z))
68 (vref32 imm0 bignum i imm1)
69 (digit-h temp0 imm0)
70 (digit-l temp1 imm0)
71 (vpush temp0)
72 (vpush temp1)
73 (la temp0 8 vsp) ; ?? why not (mr temp0 vsp) before vpushing?
74 (set-nargs 2) ; that doesn't make any difference. And, in this case,
75 ; we can get away without setting nargs (since the caller
76 ; called us with 2 args, but that's horrible style.)
77 (ba .SPvalues))
78
79
80;;; Set the 0th element of DEST (a bignum or some other 32-bit ivector)
81;;; to the Ith element of the bignum SRC.
82(defppclapfunction %ref-digit ((bignum arg_x) (i arg_y) (dest arg_z))
83 (la imm1 ppc32::misc-data-offset i)
84 (lwzx imm0 bignum imm1)
85 (stw imm0 ppc32::misc-data-offset dest)
86 (blr))
87
88;;; BIGNUM[I] := DIGIT[0]
89(defppclapfunction %set-digit ((bignum arg_x) (i arg_y) (digit arg_z))
90 (la imm1 ppc32::misc-data-offset i)
91 (lwz imm0 ppc32::misc-data-offset digit)
92 (stwx imm0 bignum imm1)
93 (blr))
94
95;;; Return 0 if the 0th digit in X is 0.
96(defppclapfunction %digit-zerop ((x arg_z))
97 (lwz imm0 ppc32::misc-data-offset x)
98 (cntlzw imm0 imm0)
99 (srwi imm0 imm0 5)
100 (rlwimi imm0 imm0 4 27 27)
101 (addi arg_z imm0 (target-nil-value))
102 (blr))
103
104;;; store the sign of bignum (0 or -1) in the one-word bignum "digit".
105(defppclapfunction %bignum-sign-digit ((bignum arg_y) (digit arg_z))
106 (vector-length imm0 bignum imm0)
107 (la imm0 (- ppc32::misc-data-offset 4) imm0) ; Reference last (most significant) digit
108 (lwzx imm0 bignum imm0)
109 (srawi imm0 imm0 31) ;propagate sign bit
110 (stw imm0 ppc32::misc-data-offset digit)
111 (blr))
112
113;;; Return the sign of bignum (0 or -1) as a fixnum
114(defppclapfunction %bignum-sign ((bignum arg_z))
115 (vector-length imm0 bignum imm0)
116 (la imm0 (- ppc32::misc-data-offset 4) imm0) ; Reference last (most significant) digit
117 (lwzx imm0 bignum imm0)
118 (srawi imm0 imm0 31) ;propagate sign bit
119 (box-fixnum arg_z imm0)
120 (blr))
121
122;;; Count the sign bits in the most significant digit of bignum;
123;;; return fixnum count.
124(defppclapfunction %bignum-sign-bits ((bignum arg_z))
125 (vector-length imm0 bignum imm0)
126 (la imm0 (- ppc32::misc-data-offset 4) imm0) ; Reference last (most significant) digit
127 (lwzx imm0 bignum imm0)
128 (cmpwi imm0 0)
129 (not imm0 imm0)
130 (blt @wasneg)
131 (not imm0 imm0)
132 @wasneg
133 (cntlzw imm0 imm0)
134 (box-fixnum arg_z imm0)
135 (blr))
136
137(defppclapfunction %digit-0-or-plusp ((bignum arg_y) (idx arg_z))
138 (la imm0 ppc32::misc-data-offset idx)
139 (lwzx imm0 bignum imm0)
140 (xoris imm0 imm0 #x8000) ; invert sign bit
141 (srwi imm0 imm0 31)
142 (bit0->boolean arg_z imm0 imm0) ; return T if sign bit was clear before inversion
143 (blr))
144
145;;; For oddp, evenp
146(defppclapfunction %bignum-oddp ((bignum arg_z))
147 (lwz imm0 ppc32::misc-data-offset bignum)
148 (clrlwi imm0 imm0 31)
149 (bit0->boolean arg_z imm0 imm0)
150 (blr))
151
152(defppclapfunction bignum-plusp ((bignum arg_z))
153 (vector-length imm0 bignum imm0)
154 (la imm0 (- ppc32::misc-data-offset 4) imm0) ; Reference last (most significant) digit
155 (lwzx imm0 bignum imm0)
156 (xoris imm0 imm0 #x8000) ; invert sign bit
157 (srwi imm0 imm0 31)
158 (bit0->boolean arg_z imm0 imm0) ; return T if sign bit was clear before inversion
159 (blr))
160
161(defppclapfunction %fixnum-to-bignum-set ((bignum arg_y) (fixnum arg_z))
162 (unbox-fixnum imm0 fixnum)
163 (stw imm0 ppc32::misc-data-offset bignum)
164 (blr))
165
166(defppclapfunction bignum-minusp ((bignum arg_z))
167 (vector-length imm0 bignum imm0)
168 (la imm0 (- ppc32::misc-data-offset 4) imm0) ; Reference last (most significant) digit
169 (lwzx imm0 bignum imm0)
170 (srwi imm0 imm0 31)
171 (rlwimi imm0 imm0 4 27 27)
172 (addi arg_z imm0 (target-nil-value)) ; return T if sign bit was clear before inversion
173 (blr))
174
175
176;;; Add the digits A[I] and B[J], and the incoming carry C (a fixnum).
177;;; Store the result in R[K], and return the outgoing carry.
178;;; If I is NIL, A is a fixnum. If J is NIL, B is a fixnum.
179
180(defppclapfunction %add-with-carry ((r 12) (k 8) (c 4) (a 0) (i arg_x) (b arg_y) (j arg_z))
181 (cmpwi cr1 j (target-nil-value))
182 (cmpwi cr0 i (target-nil-value))
183 (lwz temp0 a vsp)
184 (unbox-fixnum imm1 temp0)
185 (unbox-fixnum imm2 b)
186 (beq cr0 @got-a)
187 (la imm1 ppc32::misc-data-offset i)
188 (lwzx imm1 temp0 imm1)
189 @got-a
190 (beq cr1 @got-b)
191 (la imm2 ppc32::misc-data-offset j)
192 (lwzx imm2 b imm2)
193 @got-b
194 (lwz temp0 c vsp)
195 (unbox-fixnum imm0 temp0)
196 (addic imm0 imm0 -1)
197 (lwz temp1 r vsp)
198 (lwz temp0 k vsp)
199 (la vsp 16 vsp)
200 (adde imm0 imm1 imm2)
201 (la imm2 ppc32::misc-data-offset temp0)
202 (stwx imm0 temp1 imm2)
203 (addze imm0 rzero)
204 (box-fixnum arg_z imm0)
205 (blr))
206
207
208
209
210
211;;; Store the result of A[I] - B[J] - borrow into R[K], returning the borrow.
212;;; If I is NIL, A is a fixnum; likewise for J and B.
213(defppclapfunction %subtract-with-borrow ((r 12) (k 8) (borrow 4) (a 0) (i arg_x) (b arg_y) (j arg_z))
214 (cmpwi cr0 i (target-nil-value))
215 (cmpwi cr1 j (target-nil-value))
216 (lwz temp0 a vsp)
217 (unbox-fixnum imm2 b)
218 (unbox-fixnum imm1 temp0)
219 (beq cr1 @got-b)
220 (la imm2 ppc32::misc-data-offset j)
221 (lwzx imm2 b imm2)
222 @got-b
223 (beq cr0 @got-a)
224 (la imm1 ppc32::misc-data-offset i)
225 (lwzx imm1 temp0 imm1)
226 @got-a
227 (lwz temp0 borrow vsp)
228 (unbox-fixnum imm0 temp0)
229 (addic imm0 imm0 -1)
230 (lwz temp0 r vsp)
231 (lwz temp1 k vsp)
232 (la vsp 16 vsp)
233 (subfe imm0 imm2 imm1)
234 (la imm1 ppc32::misc-data-offset temp1)
235 (stwx imm0 temp0 imm1)
236 (addze imm0 rzero)
237 (box-fixnum arg_z imm0)
238 (blr))
239
240;; multiply i'th digit of x by y and add to result starting at digit i
241(defppclapfunction %multiply-and-add-harder-loop-2
242 ((x-ptr 4) (y-ptr 0) (resptr arg_x)(residx arg_y) (count arg_z))
243 (let ((tem imm0)
244 (y imm1)
245 (prod-h imm2)
246 (prod-l imm3)
247 (x imm4)
248 (xptr temp2)
249 (yidx temp1)
250 (yptr temp0))
251 (lwz xptr x-ptr vsp)
252 (la tem ppc32::misc-data-offset residx)
253 (lwzx x xptr tem)
254 (lwz yptr y-ptr vsp)
255 (li yidx 0) ; init yidx 0
256 (addc prod-h rzero rzero) ; init carry 0, mumble 0
257 @loop
258 (subi count count '1)
259 (cmpwi count 0)
260 (la tem ppc32::misc-data-offset yidx) ; get yidx
261 (lwzx y yptr tem)
262 (mullw prod-l x y)
263 (addc prod-l prod-l prod-h)
264 (mulhwu prod-h x y)
265 (addze prod-h prod-h)
266 (la tem ppc32::misc-data-offset residx)
267 (lwzx y resptr tem)
268 (addc prod-l prod-l y)
269 (addze prod-h prod-h)
270 (stwx prod-l resptr tem)
271 (addi residx residx '1)
272 (addi yidx yidx '1)
273 (bgt @loop)
274 (la tem ppc32::misc-data-offset residx)
275 (stwx prod-h resptr tem)
276 (la vsp 8 vsp)
277 (blr)))
278
279
280
281;;; Multiply X[I] by the unboxed value of the (non-negative) fixnum Y;
282;;; add the incoming carry from CARRY[0] to the 64-bit product. Store
283;;; the low word of the 64-bit sum in R[0] and the high word in
284;;; CARRY[0].
285
286(defppclapfunction %multiply-and-add ((r 4) (carry 0) (x arg_y) (i arg_x) (y arg_z))
287 (unbox-fixnum imm0 arg_z)
288 (la imm1 ppc32::misc-data-offset i)
289 (lwzx imm1 x imm1)
290 (mulhwu imm2 imm0 imm1)
291 (mullw imm1 imm0 imm1)
292 (lwz temp0 carry vsp)
293 (lwz imm0 ppc32::misc-data-offset temp0)
294 (addc imm1 imm1 imm0)
295 (addze imm2 imm2)
296 (stw imm2 ppc32::misc-data-offset temp0)
297 (lwz arg_z r vsp)
298 (la vsp 8 vsp)
299 (stw imm1 ppc32::misc-data-offset arg_z)
300 (blr))
301
302(defppclapfunction %floor ((q 4) (r 0) (num-high arg_x) (num-low arg_y) (denom-arg arg_z))
303 (let ((rem imm0)
304 (rem-low imm1)
305 (quo imm2)
306 (temp imm3)
307 (denom imm4))
308 (lwz denom ppc32::misc-data-offset denom)
309 (lwz rem ppc32::misc-data-offset num-high)
310 (lwz rem-low ppc32::misc-data-offset num-low)
311 (mr temp denom)
312 (sltu quo rem denom)
313 (subi temp temp quo)
314 (and temp temp denom)
315 (sub rem temp rem)
316 (li temp0 '32)
317 @loop
318 (subi temp0 temp0 '1)
319 (cmpwi temp0 0)
320 (slwi rem rem 1)
321 (srwi temp rem-low 31)
322 (or rem rem temp)
323 (slwi rem-low rem-low 1)
324 (sltu rem rem denom)
325 (slwi quo quo 1)
326 (or quo quo temp)
327 (subi temp temp 1)
328 (and temp temp denom)
329 (sub rem rem temp)
330 (bne @loop)
331 (not quo quo)
332 (lwz temp0 q vsp)
333 (stw quo ppc32::misc-data-offset temp0)
334 (lwz arg_z r vsp)
335 (la vsp 8 vsp)
336 (stw rem ppc32::misc-data-offset arg_z)
337 (blr)))
338
339(defppclapfunction %bignum-ref-hi ((bignum arg_y) (i arg_z))
340 (la imm1 ppc32::misc-data-offset i)
341 (lhzx imm0 bignum imm1)
342 (box-fixnum arg_z imm0)
343 (blr))
344
345
346(defppclapfunction %bignum-set ((bignum 0) (i arg_x) (high arg_y) (low arg_z))
347 (compose-digit imm0 high low)
348 (lwz arg_z bignum vsp)
349 (vset32 imm0 arg_z i imm1)
350 (la vsp 4 vsp)
351 (blr))
352
353
354
355
356; this is silly
357(defppclapfunction %add-the-carry ((b-h arg_x) (b-l arg_y) (carry-in arg_z))
358 (let ((a imm0)
359 (b imm1)
360 (temp imm2)
361 (c imm3))
362 (compose-digit b b-h b-l)
363 (unbox-fixnum c carry-in)
364 (add b c b)
365 (digit-h temp0 b)
366 (digit-l temp1 b)
367 (vpush temp0)
368 (vpush temp1)
369 (la temp0 8 vsp)
370 (set-nargs 2)
371 (ba .SPvalues)))
372
373
374
375
376;;; %SUBTRACT-WITH-BORROW -- Internal.
377;;;
378;;; This should be in assembler, and should not cons intermediate results. It
379;;; returns a 32bit digit and a borrow resulting from subtracting b from a, and
380;;; subtracting a possible incoming borrow.
381;;;
382;;; We really do: a - b - 1 + borrow, where borrow is either 0 or 1.
383;;;
384
385(defppclapfunction %subtract-with-borrow-1 ((a-h 4) (a-l 0) (b-h arg_x) (b-l
386arg_y) (borrow-in arg_z))
387 (let ((a imm0)
388 (b imm1)
389 (temp imm2)
390 (c imm3))
391 (lwz temp0 a-h vsp)
392 (lwz temp1 a-l vsp)
393 (compose-digit a temp0 temp1)
394 (compose-digit b b-h b-l)
395 (unbox-fixnum c borrow-in)
396 (li temp -1)
397 (addc temp c temp)
398 (subfe a b a)
399 (addze c rzero)
400 (box-fixnum c c)
401 (digit-h temp0 a)
402 (digit-l temp1 a)
403 (vpush temp0)
404 (vpush temp1)
405 (vpush c)
406 (la temp0 20 vsp)
407 (set-nargs 3)
408 (ba .SPvalues)))
409
410
411
412(defppclapfunction %subtract-one ((a-h arg_y)(a-l arg_z))
413 (let ((a imm0))
414 (compose-digit a a-h a-l)
415 (subi a a 1)
416 (digit-h temp0 a)
417 (vpush temp0)
418 (digit-l temp0 a)
419 (vpush temp0)
420 (la temp0 8 vsp)
421 (set-nargs 2)
422 (ba .spvalues)))
423
424
425
426
427;;; %MULTIPLY-AND-ADD -- Internal.
428;;;
429;;; This multiplies x-digit and y-digit, producing high and low digits
430;;; manifesting the result. Then it adds the low digit, res-digit, and
431;;; carry-in-digit. Any carries (note, you still have to add two digits at a
432;;; time possibly producing two carries) from adding these three digits get
433;;; added to the high digit from the multiply, producing the next carry digit.
434;;; Res-digit is optional since two uses of this primitive multiplies a single
435;;; digit bignum by a multiple digit bignum, and in this situation there is no
436;;; need for a result buffer accumulating partial results which is where the
437;;; res-digit comes from.
438;;; [slh] I assume that the returned carry "digit" can only be 0, 1 or 2
439
440
441(defppclapfunction %multiply-and-add-1 ((x-high 8)
442 (x-low 4)
443 (y-high 0)
444 (y-low arg_x)
445 (carry-in-high arg_y)
446 (carry-in-low arg_z))
447 (let ((x imm0)
448 (y imm1)
449 (carry-in imm2)
450 (lo imm3)
451 (hi imm4))
452 (compose-digit carry-in carry-in-high carry-in-low)
453 (vpop temp0)
454 (compose-digit y temp0 y-low)
455 (vpop temp0)
456 (vpop temp1)
457 (compose-digit x temp1 temp0)
458 (mullw lo x y)
459 (mulhwu hi x y)
460 (addc lo lo carry-in)
461 (addze hi hi)
462 (digit-h temp0 hi)
463 (digit-l temp1 hi)
464 (digit-h temp2 lo)
465 (digit-l temp3 lo)
466 (vpush temp0)
467 (vpush temp1)
468 (vpush temp2)
469 (vpush temp3)
470 (set-nargs 4)
471 (la temp0 16 vsp)
472 (ba .SPvalues)))
473
474
475(defppclapfunction %logcount-complement ((bignum arg_y) (idx arg_z))
476 (let ((arg imm0)
477 (shift imm1)
478 (temp imm2))
479 (la arg ppc32::misc-data-offset idx)
480 (lwzx arg bignum arg)
481 (not. shift arg)
482 (li arg_z 0)
483 (if ne
484 (progn
485 @loop
486 (la temp -1 shift)
487 (and. shift shift temp)
488 (la arg_z '1 arg_z)
489 (bne @loop)))
490 (blr)))
491
492(defppclapfunction %logcount ((bignum arg_y) (idx arg_z))
493 (let ((arg imm0)
494 (shift imm1)
495 (temp imm2))
496 (la arg ppc32::misc-data-offset idx)
497 (lwzx arg bignum arg)
498 (mr. shift arg)
499 (li arg_z 0)
500 (if ne
501 (progn
502 @loop
503 (la temp -1 shift)
504 (and. shift shift temp)
505 (la arg_z '1 arg_z)
506 (bne @loop)))
507 (blr)))
508
509; return res
510(defppclapfunction bignum-add-loop-2 ((aptr arg_x)(bptr arg_y) (result arg_z))
511 (let ((idx imm0)
512 (count imm1)
513 (x imm2)
514 (y imm3)
515 (len-a temp0)
516 (len-b temp1)
517 (tem temp2))
518 (li idx ppc32::misc-data-offset)
519 (lwz imm4 ppc32::misc-header-offset aptr)
520 (header-length len-a imm4)
521 (lwz imm4 ppc32::misc-header-offset bptr)
522 (header-length len-b imm4)
523 ; make a be shorter one
524 (cmpw len-a len-b)
525 (li count 0)
526 ; initialize carry 0
527 (addc x rzero rzero)
528 (ble @loop)
529 ; b shorter - swap em
530 (mr tem len-a)
531 (mr len-a len-b)
532 (mr len-b tem)
533 (mr tem aptr)
534 (mr aptr bptr)
535 (mr bptr tem)
536 @loop
537 (lwzx y aptr idx)
538 (lwzx x bptr idx)
539 (addi count count '1)
540 (cmpw count len-a)
541 (adde x x y)
542 (stwx x result idx)
543 (addi idx idx '1)
544 (blt @loop)
545 ; now propagate carry thru longer (b) using sign of shorter
546 ;(SUBI imm4 idx '1) ; y has hi order word of a
547 ;(lwzx y aptr imm4)
548 (cmpw len-a len-b)
549 (adde imm4 rzero rzero) ; get carry
550 (srawi y y 31) ; p.o.s clobbers carry
551 (addic imm4 imm4 -1) ; restore carry
552 (beq @l3) ; unless equal
553 @loop2
554 (lwzx x bptr idx)
555 (adde x x y)
556 (stwx x result idx)
557 (addi count count '1)
558 (cmpw count len-b)
559 (addi idx idx '1)
560 (blt @loop2)
561 ; y has sign of shorter - get sign of longer to x
562 @l3
563 (subi imm4 idx '1)
564 (lwzx x bptr imm4)
565 (adde imm4 rzero rzero) ; get carry
566 (srawi x x 31) ; clobbers carry
567 (addic imm4 imm4 -1)
568 (adde x x y)
569 (stwx x result idx)
570 (blr)))
571
572;; same as above but with initial a index and finishes
573(defppclapfunction bignum-add-loop-+ ((init-a 0)(aptr arg_x)(bptr arg_y)(length arg_z))
574 (let ((idx imm0)
575 (count imm1)
576 (x imm2)
577 (y imm3)
578 (aidx imm4))
579 (li idx ppc32::misc-data-offset)
580 (lwz aidx init-a vsp)
581 (addi aidx aidx ppc32::misc-data-offset)
582 (li count 0)
583 ; initialize carry 0
584 (addc x rzero rzero)
585 @loop
586 (lwzx x aptr aidx)
587 (lwzx y bptr idx)
588 (adde x x y)
589 (stwx x aptr aidx)
590 (addi count count '1)
591 (cmpw count length)
592 (addi idx idx '1)
593 (addi aidx aidx '1)
594 (blt @loop)
595 (lwzx x aptr aidx) ; add carry into next one
596 (adde x x rzero)
597 (stwx x aptr aidx)
598 (la vsp 4 vsp)
599 (blr)))
600
601
602
603(defppclapfunction bignum-negate-loop-really ((big arg_x) (len arg_y) (result arg_z))
604 (let ((idx imm0)
605 (one imm1)
606 (x imm2))
607 (li idx ppc32::misc-data-offset)
608 (li one '1)
609 ; initialize carry 1
610 (li x -1)
611 (addic x x 1)
612 @loop
613 ;(addi count count '1)
614 ;(cmpw count len)
615 (subf. len one len)
616 (lwzx x big idx)
617 (not x x)
618 (adde x x rzero)
619 (stwx x result idx)
620 (addi idx idx '1)
621 (bgt @loop)
622 ; return carry
623 (li x 0)
624 (adde x x rzero)
625 (box-fixnum arg_z x)
626 (blr)))
627
628(defppclapfunction bignum-negate-to-pointer ((big arg_x) (len arg_y) (result arg_z))
629 (let ((idx imm0)
630 (one imm1)
631 (x imm2)
632 (oidx imm3)
633 (ptr imm4))
634 (li idx ppc32::misc-data-offset)
635 (li oidx 0)
636 (macptr-ptr ptr result)
637 (li one '1)
638 ; initialize carry 1
639 (li x -1)
640 (addic x x 1)
641 @loop
642 ;(addi count count '1)
643 ;(cmpw count len)
644 (subf. len one len)
645 (lwzx x big idx)
646 (not x x)
647 (adde x x rzero)
648 (stwx x ptr oidx)
649 (addi idx idx '1)
650 (addi oidx oidx 4)
651 (bgt @loop)
652 ; return carry
653 (li x 0)
654 (adde x x rzero)
655 (box-fixnum arg_z x)
656 (blr)))
657
658;; she do tolerate len = jidx
659(defppclapfunction bignum-shift-left-loop ((nbits 4)(result 0) (bignum arg_x) (len arg_y) (jidx arg_z))
660 (let ((y imm0)
661 (idx imm1)
662 (bits imm2)
663 (rbits imm3)
664 (x imm4)
665 (iidx temp0)
666 (resptr temp1))
667 (li iidx 0)
668 (lwz bits nbits vsp)
669 (lwz resptr result vsp)
670 (unbox-fixnum bits bits)
671 (subfic rbits bits 32)
672 ;(dbg)
673 (lwz imm4 ppc32::misc-data-offset bignum)
674 (slw imm4 imm4 bits)
675 (la y (+ ppc32::misc-data-offset -4) jidx)
676 (stwx imm4 y resptr)
677
678 (cmpw len jidx)
679 (beq @done)
680 @loop
681 (addi idx iidx ppc32::misc-data-offset)
682 (lwzx x bignum idx)
683 (srw x x rbits)
684 (addi idx idx '1)
685 (lwzx y bignum idx)
686 (slw y y bits)
687 (or x x y)
688 (addi idx jidx ppc32::misc-data-offset)
689 (stwx x resptr idx)
690 (addi jidx jidx '1)
691 (cmpw jidx len)
692 (addi iidx iidx '1)
693 (blt @loop)
694 @done
695 ; do first - lo order
696
697 ; do last - hi order
698 (addi idx iidx ppc32::misc-data-offset)
699 ;(dbg t)
700 (lwzx y bignum idx)
701 (sraw y y rbits)
702 (addi idx len ppc32::misc-data-offset)
703 (stwx y resptr idx)
704 (la vsp 8 vsp)
705 (blr)))
706
707
708
709(defppclapfunction bignum-shift-right-loop-1 ((nbits 4)(result 0) (bignum arg_x) (len arg_y) (iidx arg_z))
710 (let ((y imm0)
711 (idx imm1)
712 (bits imm2)
713 (rbits imm3)
714 (x imm4)
715 (jidx temp0)
716 (resptr temp1))
717 (li jidx 0)
718 (lwz bits nbits vsp)
719 (lwz resptr result vsp)
720 (unbox-fixnum bits bits)
721 (cmpw jidx len)
722 (subfic rbits bits 32)
723 (bge @done)
724 @loop
725 (addi idx iidx ppc32::misc-data-offset)
726 (lwzx x bignum idx)
727 (srw x x bits)
728 (addi idx idx '1)
729 (lwzx y bignum idx)
730 (slw y y rbits)
731 (or x x y)
732 (addi idx jidx ppc32::misc-data-offset)
733 (stwx x resptr idx)
734 (addi jidx jidx '1)
735 (cmpw jidx len)
736 (addi iidx iidx '1)
737 (blt @loop)
738 @done
739 (addi idx iidx ppc32::misc-data-offset)
740 (lwzx x bignum idx)
741 (sraw x x bits)
742 (addi idx jidx ppc32::misc-data-offset)
743 (stwx x resptr idx)
744 (la vsp 8 vsp)
745 (blr)))
746
747
748(defppclapfunction %compare-digits ((a arg_x) (b arg_y) (idx arg_z))
749 (la imm0 ppc32::misc-data-offset idx)
750 (lwzx imm1 a imm0)
751 (lwzx imm0 b imm0)
752 (cmplw imm1 imm0)
753 (li arg_z '0)
754 (beqlr)
755 (li arg_z '1)
756 (bgtlr)
757 (li arg_z '-1)
758 (blr))
759
760
761
762;; returns number of bits in digit-hi,digit-lo that are sign bits
763;; 32 - digits-sign-bits is integer-length
764
765(defppclapfunction %digits-sign-bits ((hi arg_y) (lo arg_z))
766 (rlwinm. imm1 hi (- 16 ppc32::fixnumshift) 0 15)
767 (rlwimi imm1 lo (- 32 ppc32::fixnumshift) 16 31)
768 (not imm1 imm1)
769 (blt @wasneg)
770 (not imm1 imm1)
771 @wasneg
772 (cntlzw imm1 imm1)
773 (box-fixnum arg_z imm1)
774 (blr))
775
776(defppclapfunction bignum-logtest-loop ((count arg_x) (s1 arg_y) (s2 arg_z))
777 (addi imm1 rzero ppc32::misc-data-offset)
778 @loop
779 (lwzx imm2 s1 imm1)
780 (lwzx imm3 s2 imm1)
781 (and. imm2 imm3 imm2)
782 (addi imm1 imm1 4)
783 (bne @true)
784 (subic. count count 4)
785 (bgt @loop)
786 (li arg_z (target-nil-value))
787 (blr)
788 @true
789 (li arg_z (+ (target-nil-value) ppc32::t-offset))
790 (blr))
791
792;;; dest[idx] <- (lognot src[idx])
793(defppclapfunction %bignum-lognot ((idx arg_x) (src arg_y) (dest arg_z))
794 (la imm1 ppc32::misc-data-offset idx)
795 (lwzx imm0 src imm1)
796 (not imm0 imm0)
797 (stwx imm0 dest imm1)
798 (blr))
799
800;;; dest[idx] <- (logand x[idx] y[idx])
801(defppclapfunction %bignum-logand ((idx 0) (x arg_x) (y arg_y) (dest arg_z))
802 (vpop temp0)
803 (la imm1 ppc32::misc-data-offset temp0)
804 (lwzx imm0 x imm1)
805 (lwzx imm2 y imm1)
806 (and imm0 imm0 imm2)
807 (stwx imm0 dest imm1)
808 (blr))
809
810;;; dest[idx] <- (logandc2 x[idx] y[idx])
811(defppclapfunction %bignum-logandc2 ((idx 0) (x arg_x) (y arg_y) (dest arg_z))
812 (vpop temp0)
813 (la imm1 ppc32::misc-data-offset temp0)
814 (lwzx imm0 x imm1)
815 (lwzx imm2 y imm1)
816 (andc imm0 imm0 imm2)
817 (stwx imm0 dest imm1)
818 (blr))
819
820;;; dest[idx] <- (logandc1 x[idx] y[idx])
821(defppclapfunction %bignum-logandc1 ((idx 0) (x arg_x) (y arg_y) (dest arg_z))
822 (vpop temp0)
823 (la imm1 ppc32::misc-data-offset temp0)
824 (lwzx imm0 x imm1)
825 (lwzx imm2 y imm1)
826 (andc imm0 imm2 imm0)
827 (stwx imm0 dest imm1)
828 (blr))
829
830
831
832(defppclapfunction digit-lognot-move ((index arg_x) (source arg_y) (dest arg_z))
833 (let ((scaled-index imm1))
834 (vref32 imm0 source index scaled-index) ; imm1 has c(index) + data-offset
835 (not imm0 imm0)
836 (stwx imm0 dest scaled-index)
837 (blr)))
838
839; if dest not nil store unboxed result in dest(0), else return boxed result
840(defppclapfunction fix-digit-logandc2 ((fix arg_x) (big arg_y) (dest arg_z)) ; index 0
841 (let ((w1 imm0)
842 (w2 imm1))
843 (unbox-fixnum w1 fix)
844 (lwz w2 ppc32::misc-data-offset big)
845 (cmpwi dest (target-nil-value))
846 (not w2 w2)
847 (and w1 w1 w2)
848 (bne @store)
849 (box-fixnum arg_z w1)
850 (blr)
851 @store
852 (stw w1 ppc32::misc-data-offset dest)
853 (blr)))
854
855
856
857(defppclapfunction fix-digit-logand ((fix arg_x) (big arg_y) (dest arg_z)) ; index 0
858 (let ((w1 imm0)
859 (w2 imm1))
860 (unbox-fixnum w1 fix)
861 (lwz w2 ppc32::misc-data-offset big)
862 (cmpwi dest (target-nil-value))
863 (and w1 w1 w2)
864 (bne @store)
865 (box-fixnum arg_z w1)
866 (blr)
867 @store
868 (stw w1 ppc32::misc-data-offset dest)
869 (blr)))
870
871
872
873(defppclapfunction fix-digit-logandc1 ((fix arg_x) (big arg_y) (dest arg_z)) ; index 0
874 (let ((w1 imm0)
875 (w2 imm1))
876 (unbox-fixnum w1 fix)
877 (lwz w2 ppc32::misc-data-offset big)
878 (cmpwi dest (target-nil-value))
879 (not w1 w1)
880 (and w1 w1 w2)
881 (bne @store)
882 (box-fixnum arg_z w1)
883 (blr)
884 @store
885 (stw w1 ppc32::misc-data-offset dest)
886 (blr)))
887
888;;; dest[idx] <- (logior x[idx] y[idx])
889(defppclapfunction %bignum-logior ((idx 0) (x arg_x) (y arg_y) (dest arg_z))
890 (vpop temp0)
891 (la imm1 ppc32::misc-data-offset temp0)
892 (lwzx imm0 x imm1)
893 (lwzx imm2 y imm1)
894 (or imm0 imm0 imm2)
895 (stwx imm0 dest imm1)
896 (blr))
897
898;;; dest[idx] <- (logxor x[idx] y[idx])
899(defppclapfunction %bignum-logxor ((idx 0) (x arg_x) (y arg_y) (dest arg_z))
900 (vpop temp0)
901 (la imm1 ppc32::misc-data-offset temp0)
902 (lwzx imm0 x imm1)
903 (lwzx imm2 y imm1)
904 (xor imm0 imm0 imm2)
905 (stwx imm0 dest imm1)
906 (blr))
907
908
909
910(defppclapfunction bignum-xor-loop ((count 0) (s1 arg_x) (s2 arg_y) (dest arg_z))
911 (lwz imm0 count vsp)
912 (addi imm1 rzero ppc32::misc-data-offset)
913 @loop
914 (lwzx imm2 s1 imm1)
915 (lwzx imm3 s2 imm1)
916 (xor imm2 imm3 imm2)
917 (subic. imm0 imm0 4)
918 (stwx imm2 dest imm1)
919 (addi imm1 imm1 4)
920 (bgt @loop)
921 @out
922 (la vsp 4 vsp)
923 (blr))
924
925#+nomore
926(defppclapfunction try-guess-loop-1 ((guess-h 8)(guess-l 4)(len-y 0)
927 (xidx arg_x) (xptr arg_y) (yptr arg_z))
928 (let ((guess imm0)
929 (carry imm1)
930 (y imm2)
931 (x imm2)
932 (prod-l imm3)
933 (prod-h imm4)
934 (tem imm4)
935 (yidx temp0)
936 (end-y temp1)
937 (carry-bit temp2))
938 (lwz x guess-h vsp)
939 (lwz tem guess-l vsp)
940 (compose-digit guess x tem)
941 (lwz end-y len-y vsp)
942 (li yidx 0)
943 (li carry 0)
944 (li carry-bit '1)
945 @loop
946 ; multiply guess by ydigit, add carry to lo, hi is new carry
947 ; then get an xdigit subtract prod-lo from it and store result in x (remember carry)
948 (addi tem yidx ppc32::misc-data-offset) ; get yidx
949 (lwzx y yptr tem)
950 (mullw prod-l guess y)
951 (mulhwu prod-h guess y)
952 (addc prod-l prod-l carry)
953 (adde carry prod-h rzero)
954 ; get back saved carry
955 (li tem '-1)
956 (addc tem carry-bit tem)
957 (addi tem xidx ppc32::misc-data-offset)
958 (lwzx x xptr tem)
959 (subfe x prod-l x)
960 (stwx x xptr tem)
961 ; save carry
962 (adde prod-l rzero rzero)
963 (box-fixnum carry-bit prod-l)
964 (addi yidx yidx '1)
965 (cmpw yidx end-y)
966 (addi xidx xidx '1)
967 (blt @loop)
968 ; finally subtract carry from last x digit
969 @done
970 (li prod-l '-1) ; get back saved carry again - box clobbered it?
971 (addc prod-l carry-bit prod-l)
972 (addi tem xidx ppc32::misc-data-offset) ; maybe still there - nope
973 (lwzx x xptr tem)
974 (subfe x carry x)
975 (stwx x xptr tem)
976 (la vsp 12 vsp)
977 (blr)))
978
979;; x0 is at index, x1 at index-1, x2 at index-2
980;; y1 is at index, y2 at index-1
981;; this doesnt help much
982(defppclapfunction truncate-guess-loop ((guess-h 8)(guess-l 4)(x 0)
983 (xidx arg_x)(yptr arg_y) (yidx arg_z))
984 (let ((guess imm0)
985 (y1 imm1)
986 (y2 imm1)
987 (gy1-lo imm2) ; look out below
988 (gy1-hi imm2)
989 (gy2-lo imm2)
990 (gy2-hi imm2)
991 (xptr temp0)
992 (m imm3)
993 (tem imm4)
994 (y1-idx 28)
995 (y2-idx 24)
996 (x0-idx 20)
997 (x1-idx 16)
998 (x2-idx 12))
999 (stwu tsp -32 tsp)
1000 (stw tsp 4 tsp)
1001 (lwz y1 guess-h vsp)
1002 (lwz tem guess-l vsp)
1003 (compose-digit guess y1 tem)
1004 (addi tem yidx ppc32::misc-data-offset)
1005 (lwzx y1 yptr tem)
1006 (stw y1 y1-idx tsp)
1007 (subi tem tem 4)
1008 (lwzx y2 yptr tem)
1009 (stw y2 y2-idx tsp)
1010 (lwz xptr x vsp)
1011 (addi tem xidx ppc32::misc-data-offset)
1012 (lwzx y1 xptr tem) ; its x0
1013 (stw y1 x0-idx tsp)
1014 (subi tem tem 4)
1015 (lwzx y1 xptr tem)
1016 (stw y1 x1-idx tsp)
1017 (subi tem tem 4)
1018 (lwzx y1 xptr tem)
1019 (stw y1 x2-idx tsp)
1020 @loop
1021 (lwz y1 y1-idx tsp) ; get y1
1022 (mullw gy1-lo guess y1)
1023 (lwz m x1-idx tsp) ; get x1
1024 (subc m m gy1-lo) ; x1 - gy1-lo => m
1025 (mulhwu gy1-hi guess y1)
1026 (lwz tem x0-idx tsp) ; get x0
1027 (subfe. tem gy1-hi tem) ; - val not used just cr
1028 (lwz y2 y2-idx tsp) ; get y2
1029 (mulhwu gy2-hi guess y2) ; does it pay to do this now even tho may not need?
1030 (bne @done)
1031 (cmpl :cr0 gy2-hi m) ; if > or = and foo then more - L means logical means unsigned
1032 (blt @done) ; if < done
1033 (bne @more) ; if = test lo
1034 (mullw gy2-lo guess y2)
1035 (lwz tem x2-idx tsp) ; get x2
1036 (cmpl :cr0 gy2-lo tem)
1037 (ble @done)
1038 @more
1039 (subi guess guess 1)
1040 (b @loop)
1041 @done
1042 (digit-h temp0 guess)
1043 (vpush temp0)
1044 (digit-l temp0 guess)
1045 (vpush temp0)
1046 (la temp0 20 vsp)
1047 (lwz tsp 0 tsp)
1048 (set-nargs 2)
1049 (ba .spvalues)))
1050
1051(defppclapfunction normalize-bignum-loop ((sign arg_x)(res arg_y)(len arg_z))
1052 (let ((idx imm0)
1053 (usign imm1)
1054 (val imm2))
1055 (unbox-fixnum usign sign)
1056 (cmpwi len 0)
1057 (addi idx len (- ppc32::misc-data-offset 4))
1058 (beqlr) ; huh - can this ever happen?
1059 @loop
1060 (lwzx val res idx)
1061 (cmpw val usign)
1062 (subi idx idx '1)
1063 (bne @neq)
1064 (subic. len len '1)
1065 (bgt @loop)
1066 ; fall through - its all sign - return 1
1067 (li arg_z '1)
1068 (blr)
1069 @neq
1070 (rlwinm usign usign 0 0 0) ; hi bit
1071 (rlwinm val val 0 0 0)
1072 (cmpw usign val) ; is hi bit = sign, if so then done
1073 (beqlr)
1074 (addi len len '1) ; if not, need 1 more
1075 (blr)))
1076
1077(defppclapfunction %normalize-bignum-2 ((fixp arg_y)(res arg_z))
1078 (let ((idx imm0)
1079 (usign imm1)
1080 (val imm2)
1081 (len arg_x)
1082 (oldlen temp0))
1083 (lwz imm4 (- ppc32::fulltag-misc) res)
1084 (header-length len imm4)
1085 (cmpwi len 0)
1086 (mr oldlen len)
1087 (addi idx len (- ppc32::misc-data-offset 4))
1088 (beqlr) ; huh - can this ever happen?
1089 (lwzx val res idx) ; high order word
1090 (srawi usign val 31) ; get sign
1091 @loop
1092 (lwzx val res idx)
1093 (cmpw val usign)
1094 (subi idx idx '1)
1095 (bne @neq)
1096 (subic. len len '1)
1097 (bgt @loop)
1098 ; fall through - its all sign - return 1
1099 (li len '1)
1100 (rlwinm usign usign 0 0 0) ; hi bit
1101 (b @more)
1102 @neq
1103 (rlwinm usign usign 0 0 0) ; hi bit
1104 (rlwinm val val 0 0 0)
1105 (cmpw usign val) ; is hi bit = sign, if so then done
1106 (beq @more)
1107 (addi len len '1) ; if not, need 1 more
1108 (b @big)
1109 @more
1110 (cmpwi :cr1 fixp (target-nil-value))
1111 (cmpwi len '1)
1112 (beq :cr1 @big) ; dont return fixnum
1113 (bgt @big)
1114 ;; stuff for maybe fixnum
1115 ;(dbg t)
1116 (lwz val ppc32::misc-data-offset res)
1117 (rlwinm imm4 val 0 0 2) ; hi 3 bits same? - we assume fixnumshift is 2
1118 (srawi usign usign 2)
1119 (cmpw usign imm4)
1120 (bne @big)
1121 (box-fixnum arg_z val)
1122 (blr)
1123 @big
1124 (cmpw oldlen len)
1125 (beqlr) ; same length - done
1126 (li imm4 ppc32::subtag-bignum) ; set new length
1127 (rlwimi imm4 len (- ppc32::num-subtag-bits ppc32::fixnumshift) 0 (- 31 ppc32::num-subtag-bits))
1128 (stw imm4 ppc32::misc-header-offset res)
1129 ; 0 to tail if negative
1130 (cmpwi usign 0)
1131 (beqlr)
1132 ; zero from len inclusive to oldlen exclusive
1133 ;(dbg t)
1134 (addi idx len ppc32::misc-data-offset)
1135 @loop2
1136 (stwx rzero idx res)
1137 (addi len len '1)
1138 (cmpw len oldlen)
1139 (addi idx idx '1)
1140 (blt @loop2)
1141 (blr)))
1142
1143(defppclapfunction %count-digit-leading-zeros ((high arg_y) (low arg_z))
1144 (compose-digit imm0 high low)
1145 (cntlzw imm0 imm0)
1146 (box-fixnum arg_z imm0)
1147 (blr))
1148
1149(defppclapfunction %count-digit-trailing-zeros ((high arg_y) (low arg_z))
1150 (compose-digit imm0 high low)
1151 (neg imm1 imm0)
1152 (and imm0 imm0 imm1)
1153 (cntlzw imm0 imm0)
1154 (subfic imm0 imm0 31)
1155 (box-fixnum arg_z imm0)
1156 (blr))
1157
1158
1159(defppclapfunction %bignum-count-trailing-zero-bits ((bignum arg_z))
1160 (let ((ndigits arg_x)
1161 (nbits arg_y)
1162 (digit imm0)
1163 (ptr imm1))
1164 (li ptr ppc32::misc-data-offset)
1165 (li ndigits '-32)
1166 @next
1167 (lwzx digit bignum ptr)
1168 (cmpwi digit 0)
1169 (la ptr 4 ptr)
1170 (addi ndigits ndigits '32)
1171 (beq @next)
1172 (neg ptr digit)
1173 (and digit digit ptr)
1174 (cntlzw digit digit)
1175 (subfic digit digit 31)
1176 (box-fixnum nbits digit)
1177 (add arg_z nbits ndigits)
1178 (blr)))
1179
1180
1181(defppclapfunction %bignum-trim-leading-zeros ((bignum arg_x)
1182 (start arg_y)
1183 (len arg_z))
1184 (add imm1 start len)
1185 (la imm1 (- ppc32::misc-data-offset 4) imm1)
1186 @loop
1187 (cmpwi cr0 len '1)
1188 (lwzx imm0 bignum imm1)
1189 (cmpwi cr1 imm0 0)
1190 (la imm1 -4 imm1)
1191 (bnelr cr1)
1192 (la len '-1 len)
1193 (bne @loop)
1194 (blr))
1195
1196;;; Set length of bignum to new-len (zeroing out any trailing words between
1197;;; the old length and the new.
1198(defppclapfunction %shrink-bignum ((new-len arg_y) (bignum arg_z))
1199 (let ((old-len temp0)
1200 (old-idx imm0)
1201 (new-idx imm2)
1202 (header imm1))
1203 (getvheader header bignum)
1204 (header-length old-len header)
1205 (cmpw old-len new-len)
1206 (la old-idx ppc32::misc-data-offset old-len)
1207 (la new-idx ppc32::misc-data-offset new-len)
1208 (beqlr)
1209 @loop
1210 (subi old-idx old-idx 4)
1211 (cmpw old-idx new-idx)
1212 (stwx ppc32::rzero bignum old-idx)
1213 (bne @loop)
1214 (slwi header new-len (- ppc32::num-subtag-bits ppc32::fixnumshift))
1215 (ori header header ppc32::subtag-bignum)
1216 (stw header ppc32::misc-header-offset bignum)
1217 (blr)))
1218
1219;;; Especially when large operands are involved, the GNU Multiple Precision
1220;;; library's algorithm's are often faster than Clozure CL's. GMP's MPN
1221;;; library defines operations on "limb vectors", which are basically
1222;;; just sequences of 32-bit digits (least-significant digit first), which
1223;;; is just about exactly the same way that Clozure CL stores bignums.
1224;;; We might want to (eventually) link some or all of GMP into Clozure CL;
1225;;; in the meantime, it seems that we get some performance benefit from
1226;;; using GMP representation and algorithms in some mixture of LAP and Lisp.
1227;;; To approximate the "limb vector" representation, we copy operands to
1228;;; (and results from) stack-allocated macptrs. Since those macptrs are
1229;;; word-aligned, we can use fixnums to represent word-aligned pointers.
1230;;; Obviously, it costs a little to copy back and forth like this; we
1231;;; only win when operands are fairly large, and when we can replace an
1232;;; N^2 algorithm with something cheaper.
1233
1234;;; Macptr MUST be word-aligned (low 2 bits must be 0). Extract
1235;;; such an address, return it as a fixnum.
1236(defppclapfunction macptr->fixnum ((ptr arg_z))
1237 (macptr-ptr arg_z ptr)
1238 (blr))
1239
1240;;; Copy the limb SRC points to to where DEST points.
1241(defppclapfunction copy-limb ((src arg_y) (dest arg_z))
1242 (lwz imm0 0 src)
1243 (stw imm0 0 dest)
1244 (blr))
1245
1246;;; Return T iff LIMB contains 0.
1247(defppclapfunction limb-zerop ((limb arg_z))
1248 (lwz imm0 0 limb)
1249 (cntlzw imm0 imm0)
1250 (srwi imm0 imm0 5)
1251 (bit0->boolean arg_z imm0 imm0)
1252 (blr))
1253
1254;;; Return -1,0,1 according to whether the contents of Y are
1255;;; <,=,> the contents of Z.
1256(defppclapfunction compare-limbs ((y arg_y) (z arg_z))
1257 (lwz imm1 0 z)
1258 (lwz imm0 0 y)
1259 (cmplw imm0 imm1)
1260 (li arg_z 0)
1261 (beqlr)
1262 (li arg_z '1)
1263 (bgtlr)
1264 (li arg_z '-1)
1265 (blr))
1266
1267;;; Add a fixnum to the limb LIMB points to. Ignore overflow.
1268(defppclapfunction add-fixnum-to-limb ((fixnum arg_y) (limb arg_z))
1269 (unbox-fixnum imm0 fixnum)
1270 (lwz imm1 0 limb)
1271 (add imm1 imm1 imm0)
1272 (stw imm1 0 limb)
1273 (blr))
1274
1275;;; Store a fixnum value where LIMB points.
1276(defppclapfunction copy-fixnum-to-limb ((fixnum arg_y) (limb arg_z))
1277 (unbox-fixnum imm0 fixnum)
1278 (stwu imm0 0 limb)
1279 (blr))
1280
1281;;; Increment a "LIMB VECTOR" (bignum) by a small amount. The caller
1282;;; knows that carries will only propagate for a word or two.
1283(defppclapfunction mpn-incr-u ((limb arg_y) (fixby arg_z))
1284 (let ((by imm0)
1285 (sum imm1))
1286 (unbox-fixnum by fixby)
1287 @loop
1288 (lwz sum 0 limb)
1289 (add sum sum by)
1290 (cmplw sum by)
1291 (stw sum 0 limb)
1292 (li by 1)
1293 (la limb 4 limb)
1294 (blt @loop)
1295 (blr)))
1296
1297;;; Store XP-YP at WP; return carry (0 or 1).
1298;;; wp, xp, yp: word-aligned, unboxed ptrs (fixnums)
1299;;; size: boxed fixnum
1300;;; returns boxed carry
1301(defppclapfunction mpn-sub-n ((wp 0) (xp arg_x) (yp arg_y) (size arg_z))
1302 (vpop imm0)
1303 (subi size size '1)
1304 (cmpwi size 0)
1305 (lwz imm3 0 xp)
1306 (lwz imm4 0 yp)
1307 (sub imm1 xp imm0) ; imm1 = xp-wp
1308 (sub imm2 yp imm0) ; imm2 = yp-wp
1309 (addi imm1 imm1 4) ; imm1 = xp-wp+4
1310 (addi imm2 imm2 4) ; imm2 = yp-wp+4
1311 (subfc imm3 imm4 imm3)
1312 (stw imm3 0 imm0) ; wp[0]
1313 (beq @done)
1314 @top
1315 (subi size size '1)
1316 (cmpwi size 0)
1317 (lwzx imm3 imm1 imm0) ; imm3 = xp[i]
1318 (lwzx imm4 imm2 imm0) ; imm4 = xp[i]
1319 (subfe imm3 imm4 imm3)
1320 (stwu imm3 4 imm0)
1321 (bne @top)
1322 @done
1323 (subfe imm0 rzero rzero)
1324 (subfic imm0 imm0 0)
1325 (box-fixnum arg_z imm0)
1326 (blr))
1327
1328;;; Store XP+YP at WP; return carry (0 or 1).
1329;;; wp, xp, yp = word-aligned, unboxed macptrs (fixnums).
1330;;; size = boxed fixnum
1331;;; result = boxed carry
1332(defppclapfunction mpn-add-n ((wp 0) (xp arg_x) (yp arg_y) (size arg_z))
1333 (vpop imm0)
1334 (subi size size '1)
1335 (cmpwi size 0)
1336 (lwz imm3 0 xp)
1337 (lwz imm4 0 yp)
1338 (sub imm1 xp imm0) ; imm1 = xp-wp
1339 (sub imm2 yp imm0) ; imm2 = yp-wp
1340 (addi imm1 imm1 4) ; imm1 = xp-wp+4
1341 (addi imm2 imm2 4) ; imm2 = yp-wp+4
1342 (addc imm3 imm3 imm4)
1343 (stw imm3 0 imm0) ; wp[0]
1344 (beq @done)
1345 @top
1346 (subi size size '1)
1347 (cmpwi size 0)
1348 (lwzx imm3 imm1 imm0) ; imm3 = xp[i]
1349 (lwzx imm4 imm2 imm0) ; imm4 = xp[i]
1350 (adde imm3 imm4 imm3)
1351 (stwu imm3 4 imm0)
1352 (bne @top)
1353 @done
1354 (addze imm0 rzero)
1355 (box-fixnum arg_z imm0)
1356 (blr))
1357
1358;;; Add the single limb LIMB to S1P (propagating carry.) Store the
1359;;; result at RP. RP and S1P may be the same place, so check for
1360;;; that and do nothing after carry stops propagating. Return carry.
1361(defppclapfunction mpn-add-1 ((rp-offset 0) (s1p arg_x) (size arg_y) (limb arg_z))
1362 (let ((rp temp0))
1363 (vpop rp)
1364 (subi size size '1)
1365 (cmpwi cr2 size 0)
1366 (cmpw cr1 rp s1p) ;a common case
1367 (subi rp rp 4)
1368 (subi s1p s1p 4)
1369 (lwz imm0 0 limb)
1370 (lwzu imm1 4 s1p)
1371 (addc imm1 imm1 imm0)
1372 (addze. imm0 rzero)
1373 (stwu imm1 4 rp)
1374 (beq cr2 @done)
1375 @top
1376 (beq cr0 @finish) ; branch if no more carry
1377 (subi size size '1)
1378 (cmpwi cr2 size 0)
1379 (lwzu imm1 4 s1p)
1380 (addc imm1 imm1 imm0)
1381 (addze. imm0 rzero)
1382 (stwu imm1 4 rp)
1383 (bne cr2 @top)
1384 (box-fixnum arg_z imm0)
1385 (blr)
1386 @finish
1387 (beq cr1 @done)
1388 @loop
1389 (subi size size '1)
1390 (cmpwi cr2 size 0)
1391 (lwzu imm1 4 s1p)
1392 (stwu imm1 4 rp)
1393 (bne cr2 @loop)
1394 @done
1395 (box-fixnum arg_z imm0)
1396 (blr)))
1397;;; Multiply the limb vector S1 by the single limb at LIMBPTR, storing
1398;;; the result at RES. Store the "carry out" (high word of last 64-bit
1399;;; partial product) at the limb RESULT.
1400;;; res, s1, limbptr, result:
1401;;; unboxed, word-aligned ptrs (fixnums). size: boxed fixnum
1402;;; It'd be hard to transliterate the GMP code here; the GMP version
1403;;; uses lots more immediate registers than we can easily use in LAP
1404;;; (and is much more aggressively pipelined).
1405(defppclapfunction mpn-mul-1 ((res-offset 4)
1406 (s1-offset 0)
1407 (size arg_x)
1408 (limbptr arg_y)
1409 (result arg_z))
1410 (let ((limb imm0)
1411 (resptr temp0)
1412 (s1 temp1)
1413 (src imm1)
1414 (prod-low imm2)
1415 (prod-high imm3)
1416 (carry imm4))
1417 (lwz resptr res-offset vsp)
1418 (lwz s1 s1-offset vsp)
1419 (la vsp 8 vsp)
1420 (la resptr -4 resptr) ; pre-decrement
1421 (la s1 -4 s1)
1422 (addic carry carry 0)
1423 (li carry 0)
1424 (lwz limb 0 limbptr)
1425 @loop
1426 (subi size size '1)
1427 (cmpwi size 0)
1428 (lwzu src 4 s1)
1429 (mulhwu prod-high src limb)
1430 (mullw prod-low src limb)
1431 (addc prod-low prod-low carry)
1432 (addze carry prod-high)
1433 (stwu prod-low 4 resptr)
1434 (bne @loop)
1435 (stw carry 0 result)
1436 (blr)))
1437
1438;;; multiply s1*limb and add result to res
1439;;; res, s1, limbptr, result:
1440;;; unboxed, word-aligned ptrs (fixnums).
1441;;; size: boxed fixnum
1442;;; limbptr: source "limb".
1443;;; result: carry out (high word of product).
1444(defppclapfunction mpn-addmul-1 ((res-offset 4)
1445 (s1-offset 0)
1446 (size arg_x)
1447 (limbptr arg_y)
1448 (result arg_z))
1449 (let ((limb imm0)
1450 (resptr temp0)
1451 (s1 temp1)
1452 (src imm1)
1453 (prod-low imm2)
1454 (prod-high imm3)
1455 (carry imm4)
1456 (prev imm4))
1457 (lwz resptr res-offset vsp)
1458 (lwz s1 s1-offset vsp)
1459 (la vsp 8 vsp)
1460 (la resptr -4 resptr) ; pre-decrement
1461 (la s1 -4 s1)
1462 (addic carry carry 0)
1463 (li carry 0)
1464 (lwz limb 0 limbptr)
1465 @loop
1466 (subi size size '1)
1467 (cmpwi size 0)
1468 (lwzu src 4 s1)
1469 (mulhwu prod-high src limb)
1470 (mullw prod-low src limb)
1471 (addc prod-low prod-low carry)
1472 (addze prod-high prod-high)
1473 (lwz prev 4 resptr)
1474 (addc prev prev prod-low)
1475 (stwu prev 4 resptr)
1476 (addze carry prod-high)
1477 (bne @loop)
1478 (stw carry 0 result)
1479 (blr)))
1480
1481;;; Multiply the UN-word limb vector at UP and the VN-word limb vector
1482;;; at VP, store the result at RP.
1483(defppclapfunction mpn-mul-basecase ((rp-offset 4)
1484 (up-offset 0)
1485 (un arg_x)
1486 (vp arg_y)
1487 (vn arg_z))
1488 (let ((resptr temp0)
1489 (s1 temp1)
1490 (up temp2)
1491 (rp temp3)
1492 (size nargs)
1493 (limb imm0)
1494 (src imm1)
1495 (prod-low imm2)
1496 (prod-high imm3)
1497 (prev imm4)
1498 (carry imm4))
1499 (lwz resptr rp-offset vsp)
1500 (la rp -4 resptr)
1501 (lwz up up-offset vsp)
1502 (la s1 -4 up)
1503 (la vsp 8 vsp)
1504 (mr size un)
1505 (lwz limb 0 vp)
1506 (subi vn vn '1)
1507 (cmpwi cr2 vn 0)
1508 (li carry 0)
1509 @mul-1-loop
1510 (subi size size '1)
1511 (cmpwi size 0)
1512 (lwzu src 4 s1)
1513 (mulhwu prod-high src limb)
1514 (mullw prod-low src limb)
1515 (addc prod-low prod-low carry)
1516 (addze carry prod-high)
1517 (stwu prod-low 4 rp)
1518 (bne @mul-1-loop)
1519 (stw carry 4 rp)
1520 @again
1521 (beq cr2 @done)
1522 (subi vn vn '1)
1523 (cmpwi cr2 vn 0)
1524 (mr rp resptr)
1525 (la resptr 4 resptr)
1526 (la s1 -4 up)
1527 (lwzu limb 4 vp)
1528 (mr size un)
1529 (addic carry carry 0)
1530 (li carry 0)
1531 @addmul-1-loop
1532 (subi size size '1)
1533 (cmpwi size 0)
1534 (lwzu src 4 s1)
1535 (mulhwu prod-high src limb)
1536 (mullw prod-low src limb)
1537 (addc prod-low prod-low carry)
1538 (addze prod-high prod-high)
1539 (lwz prev 4 rp)
1540 (addc prev prev prod-low)
1541 (stwu prev 4 rp)
1542 (addze carry prod-high)
1543 (bne @addmul-1-loop)
1544 (stw carry 4 rp)
1545 (b @again)
1546 @done
1547 (li arg_z (target-nil-value))
1548 (blr)))
1549
1550;;; left-shift src by 1 bit, storing result at res. Return
1551;;; the bit that was shifted out.
1552(defppclapfunction mpn-lshift-1 ((resptr arg_x) (s1ptr arg_y) (size-arg arg_z))
1553 (let ((size temp0)
1554 (last-bit imm0)
1555 (prev imm1)
1556 (curr imm2)
1557 (sleft imm3)
1558 (sright imm4))
1559 (subi size size-arg '1)
1560 (cmpwi size 0)
1561 (add resptr resptr size-arg)
1562 (add s1ptr s1ptr size-arg)
1563 (lwzu prev -4 s1ptr)
1564 (srwi last-bit prev 31)
1565 (box-fixnum arg_z last-bit)
1566 (beq @end1)
1567 @loop
1568 (subi size size '1)
1569 (cmpwi size 0)
1570 (lwzu curr -4 s1ptr)
1571 (slwi sleft prev 1)
1572 (srwi sright curr 31)
1573 (or sright sright sleft)
1574 (stwu sright -4 resptr)
1575 (beq @end2)
1576 (subi size size '1)
1577 (cmpwi size 0)
1578 (lwzu prev -4 s1ptr)
1579 (slwi sleft curr 1)
1580 (srwi sright prev 31)
1581 (or sright sright sleft)
1582 (stwu sright -4 resptr)
1583 (bne @loop)
1584 @end1
1585 (slwi sleft prev 1)
1586 (stwu sleft -4 resptr)
1587 (blr)
1588 @end2
1589 (slwi sleft curr 1)
1590 (stwu sleft -4 resptr)
1591 (blr)))
1592
1593;;; Do a 32x32=64 unsigned multiply of the words at X and Y. Store
1594;;; result (low word first) at RESULT.
1595(defppclapfunction umulppm ((x arg_x) (y arg_y) (result arg_z))
1596 (lwz imm0 0 x)
1597 (lwz imm1 0 y)
1598 (mullw imm2 imm0 imm1)
1599 (mulhwu imm3 imm0 imm1)
1600 (stw imm2 0 result)
1601 (stw imm3 4 result)
1602 (blr))
1603
1604
1605;;; for truncate-by-fixnum etal
1606;;; doesnt store quotient - just returns rem in 2 halves
1607(defppclapfunction %floor-loop-no-quo ((q arg_x)(yhi arg_y)(ylo arg_z))
1608 (let ((a imm1)
1609 (b imm2)
1610 (y imm3)
1611 (quo imm0)
1612 (qidx temp0)
1613 (qlen temp1))
1614 (lwz imm4 (- ppc32::fulltag-misc) q)
1615 (header-length qlen imm4)
1616 (subi qidx qlen 4)
1617 (mr b rzero)
1618 (compose-digit y yhi ylo)
1619 @loop
1620 (rlwinm a b -16 16 31)
1621 (rlwinm b b 16 0 15)
1622 (la imm4 ppc32::misc-data-offset q)
1623 (lwzx imm4 qidx imm4) ; q contents
1624 (rlwimi b imm4 16 16 31) ; hi 16 to lo b
1625 ;(dbg)
1626 (48x32-divide a b y fp0 fp1 fp2 imm4)
1627 (fctiwz fp0 fp0)
1628 (stwu tsp -32 tsp)
1629 (stw tsp 4 tsp)
1630 (stfd fp0 24 tsp)
1631 (lwz quo (+ 24 4) tsp) ; 16 quo bits above stuff used by 48x32
1632 ; now mul quo by y
1633 (mullw imm4 y quo)
1634 ; and subtract from a,b
1635 (subfc b imm4 b)
1636 ; new a and b are low 2 digits of this (b) and last digit in array
1637 ; and do it again on low 3 digits
1638 ;(dbg)
1639 (rlwinm a b -16 16 31)
1640 (rlwinm b b 16 0 15)
1641 (la imm4 ppc32::misc-data-offset q)
1642 (lwzx imm4 qidx imm4)
1643 (rlwimi b imm4 0 16 31)
1644 (48x32-divide a b y fp0 fp1 fp2 imm4)
1645 (fctiwz fp0 fp0)
1646 (stfd fp0 16 tsp) ; quo lo
1647 (subi qidx qidx 4)
1648 (cmpwi :cr1 qidx 0)
1649 (lwz quo (+ 16 4) tsp)
1650 (lwz tsp 0 tsp)
1651 (mullw imm4 y quo)
1652 (subfc b imm4 b) ; b is remainder
1653 (bge :cr1 @loop)
1654 (digit-h temp0 b)
1655 (vpush temp0)
1656 (digit-l temp0 b)
1657 (vpush temp0)
1658 (la temp0 8 vsp)
1659 (set-nargs 2)
1660 (ba .SPvalues)))
1661
1662
1663; store result in dest, return rem in 2 halves
1664(defppclapfunction %floor-loop-quo ((q-stk 0)(dest arg_x)(yhi arg_y)(ylo arg_z))
1665 (let ((a imm1)
1666 (b imm2)
1667 (y imm3)
1668 (quo imm0)
1669 (qidx temp0)
1670 (qlen temp1)
1671 (q temp2))
1672 (vpop q)
1673 (lwz imm4 (- ppc32::fulltag-misc) q)
1674 (header-length qlen imm4)
1675 (subi qidx qlen 4)
1676 (mr b rzero)
1677 (compose-digit y yhi ylo)
1678 @loop
1679 (rlwinm a b -16 16 31)
1680 (rlwinm b b 16 0 15)
1681 (la imm4 ppc32::misc-data-offset q)
1682 (lwzx imm4 qidx imm4) ; q contents
1683 (rlwimi b imm4 16 16 31) ; hi 16 to lo b
1684 (48x32-divide a b y fp0 fp1 fp2 imm4)
1685 (fctiwz fp0 fp0)
1686 (stwu tsp -32 tsp)
1687 (stw tsp 4 tsp)
1688 (stfd fp0 24 tsp)
1689 (lwz quo (+ 24 4) tsp) ; 16 quo bits above stuff used by 48x32
1690 ; now mul quo by y
1691 (mullw imm4 y quo)
1692 ; and subtract from a,b
1693 (subfc b imm4 b)
1694 ; new a and b are low 2 digits of this (b) and last digit in array
1695 ; and do it again on low 3 digits
1696 ;(dbg)
1697 (rlwinm a b -16 16 31)
1698 (rlwinm b b 16 0 15)
1699 (la imm4 ppc32::misc-data-offset q)
1700 (lwzx imm4 qidx imm4)
1701 (rlwimi b imm4 0 16 31)
1702 (48x32-divide a b y fp0 fp1 fp2 imm4)
1703 (fctiwz fp0 fp0)
1704 (stfd fp0 16 tsp) ; quo lo
1705 (lwz quo (+ 16 4) tsp)
1706 (mullw imm4 y quo)
1707 (subfc b imm4 b) ; b is remainder
1708 (lwz quo (+ 24 4) tsp) ; quo-hi
1709 (rlwinm quo quo 16 0 15)
1710 (lwz imm4 (+ 16 4) tsp) ; quo lo
1711 (lwz tsp 0 tsp)
1712 (rlwimi quo imm4 0 16 31)
1713 (la imm4 ppc32::misc-data-offset dest)
1714 (stwx quo qidx imm4)
1715 (subic. qidx qidx 4)
1716 (bge @loop)
1717 (digit-h temp0 b)
1718 (vpush temp0)
1719 (digit-l temp0 b)
1720 (vpush temp0)
1721 (la temp0 8 vsp)
1722 (set-nargs 2)
1723 (ba .SPvalues)))
1724
1725;;; get xidx thing from x, yidx thing from y if same return #xffff
1726;;; #xffff otherwise get another thing from x and 1- xidx and do as
1727;;; %floor of xthing otherx ything
1728;;; Huh?
1729(defppclapfunction %floor-99 ((x-stk 0)(xidx arg_x)(yptr arg_y)(yidx arg_z))
1730 (let ((xptr temp0)
1731 (a imm1)
1732 (b imm2)
1733 (y imm3)
1734 (quo imm0))
1735 (vpop xptr)
1736 (la imm4 ppc32::misc-data-offset XIDX)
1737 (lwzx a xptr imm4)
1738 (la imm4 ppc32::misc-data-offset YIDX)
1739 (lwzx y yptr imm4)
1740 (cmpw a y)
1741 (bne @more)
1742 (li imm4 #xffff)
1743 (rlwinm imm4 imm4 ppc32::fixnumshift (- 16 ppc32::fixnumshift) (- 31 ppc32::fixnum-shift))
1744 (vpush imm4)
1745 (vpush imm4)
1746 (la temp0 8 vsp)
1747 (set-nargs 2)
1748 (ba .spvalues)
1749 @MORE
1750 ; a has 16 bits from ahi, bhi gets alo blo gets bhi
1751 (la imm4 (- ppc32::misc-data-offset 4) xidx)
1752 (lwzx b xptr imm4)
1753 (rlwinm b b 16 16 31) ; bhi to blo
1754 (rlwimi b a 16 0 15) ; alo to bhi
1755 (rlwinm a a 16 16 31) ; a gets alo
1756 (48x32-divide a b y fp0 fp1 fp2 imm4)
1757 (fctiwz fp0 fp0)
1758 (stwu tsp -32 tsp)
1759 (stw tsp 4 tsp)
1760 (stfd fp0 24 tsp)
1761 (lwz quo (+ 24 4) tsp) ; 16 quo bits above stuff used by 48x32
1762 ; now mul quo by y
1763 (mullw imm4 y quo)
1764 ; and subtract from a,b
1765 (subfc b imm4 b)
1766 ; AND AGAIN
1767 (rlwinm a b -16 16 31) ; a gets b hi
1768 (rlwinm b b 16 0 15) ; b lo to b hi
1769 (la imm4 (- ppc32::misc-data-offset 4) xidx)
1770 (lwzx imm4 imm4 xptr)
1771 (rlwimi b imm4 0 16 31)
1772 (48x32-divide a b y fp0 fp1 fp2 imm4)
1773 (fctiwz fp0 fp0)
1774 (stfd fp0 16 tsp) ; quo lo
1775 (lwz quo (+ 24 4) tsp) ; quo-hi
1776 (box-fixnum temp0 quo)
1777 (vpush temp0)
1778 (lwz quo (+ 16 4) tsp) ; quo lo
1779 (lwz tsp 0 tsp)
1780 (box-fixnum temp0 quo)
1781 (vpush temp0)
1782 (la temp0 8 vsp)
1783 (set-nargs 2)
1784 (ba .SPvalues)))
1785
1786; End of ppc32-bignum.lisp
Note: See TracBrowser for help on using the repository browser.