close Warning: Can't use blame annotator:
No changeset 1633 in the repository

source: release/1.11/source/level-0/PPC/PPC32/ppc32-bignum.lisp

Last change on this file was 16688, checked in by R. Matthew Emerson, 9 years ago

Merge copyright/license header changes to 1.11 release branch.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 47.9 KB
RevLine 
1;;-*- Mode: Lisp; Package: CCL -*-
2;;;
3;;; Copyright 1994-2009 Clozure Associates
4;;;
5;;; Licensed under the Apache License, Version 2.0 (the "License");
6;;; you may not use this file except in compliance with the License.
7;;; You may obtain a copy of the License at
8;;;
9;;; http://www.apache.org/licenses/LICENSE-2.0
10;;;
11;;; Unless required by applicable law or agreed to in writing, software
12;;; distributed under the License is distributed on an "AS IS" BASIS,
13;;; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14;;; See the License for the specific language governing permissions and
15;;; limitations under the License.
16
17
18(in-package "CCL")
19
20(eval-when (:compile-toplevel :execute)
21 (require "PPC32-ARCH")
22 (require "PPC-LAPMACROS")
23
24 ;; Set RES to 1 if (u< x y), to 0 otherwise.
25 (defppclapmacro sltu (res x y)
26 `(progn
27 (subfc ,res ,x ,y)
28 (subfe ,res ,res ,res)
29 (neg ,res ,res)))
30
31 (defppclapmacro 48x32-divide (x-hi16 x-lo y freg temp-freg freg2 immx)
32 `(let ((temp 16)
33 (temp.h 16)
34 (temp.l 20)
35 (zero 8)
36 (zero.h 8)
37 (zero.l 12))
38 (stwu tsp -24 tsp)
39 (stw tsp 4 tsp)
40 (lwi ,immx #x43300000) ; 1075 = 1022+53
41 (stw ,immx zero.h tsp)
42 (stw rzero zero.l tsp)
43 (lfd ,temp-freg zero tsp)
44 (rlwimi ,immx ,x-hi16 0 16 31)
45 (stw ,immx temp.h tsp)
46 (stw ,x-lo temp.l tsp)
47 (lfd ,freg temp tsp)
48
49 (fsub ,freg ,freg ,temp-freg)
50 (lwi ,immx #x43300000)
51 (stw ,immx temp.h tsp)
52 (stw ,y temp.l tsp)
53 (lfd ,freg2 temp tsp)
54 (lwz tsp 0 tsp)
55 (fsub ,freg2 ,freg2 ,temp-freg)
56 (fdiv ,freg ,freg ,freg2)
57 ))
58
59 )
60
61;;; %BIGNUM-REF needs to access bignums as obviously as possible, and it needs
62;;; to be able to return 32 bits somewhere no one looks for real objects.
63;;;
64;;; The easiest thing to do is to store the 32 raw bits in two fixnums
65;;; and return multiple values.
66(defppclapfunction %bignum-ref ((bignum arg_y) (i arg_z))
67 (vref32 imm0 bignum i imm1)
68 (digit-h temp0 imm0)
69 (digit-l temp1 imm0)
70 (vpush temp0)
71 (vpush temp1)
72 (la temp0 8 vsp) ; ?? why not (mr temp0 vsp) before vpushing?
73 (set-nargs 2) ; that doesn't make any difference. And, in this case,
74 ; we can get away without setting nargs (since the caller
75 ; called us with 2 args, but that's horrible style.)
76 (ba .SPvalues))
77
78
79;;; Set the 0th element of DEST (a bignum or some other 32-bit ivector)
80;;; to the Ith element of the bignum SRC.
81(defppclapfunction %ref-digit ((bignum arg_x) (i arg_y) (dest arg_z))
82 (la imm1 ppc32::misc-data-offset i)
83 (lwzx imm0 bignum imm1)
84 (stw imm0 ppc32::misc-data-offset dest)
85 (blr))
86
87;;; BIGNUM[I] := DIGIT[0]
88(defppclapfunction %set-digit ((bignum arg_x) (i arg_y) (digit arg_z))
89 (la imm1 ppc32::misc-data-offset i)
90 (lwz imm0 ppc32::misc-data-offset digit)
91 (stwx imm0 bignum imm1)
92 (blr))
93
94;;; Return 0 if the 0th digit in X is 0.
95(defppclapfunction %digit-zerop ((x arg_z))
96 (lwz imm0 ppc32::misc-data-offset x)
97 (cntlzw imm0 imm0)
98 (srwi imm0 imm0 5)
99 (rlwimi imm0 imm0 4 27 27)
100 (addi arg_z imm0 (target-nil-value))
101 (blr))
102
103;;; store the sign of bignum (0 or -1) in the one-word bignum "digit".
104(defppclapfunction %bignum-sign-digit ((bignum arg_y) (digit arg_z))
105 (vector-length imm0 bignum imm0)
106 (la imm0 (- ppc32::misc-data-offset 4) imm0) ; Reference last (most significant) digit
107 (lwzx imm0 bignum imm0)
108 (srawi imm0 imm0 31) ;propagate sign bit
109 (stw imm0 ppc32::misc-data-offset digit)
110 (blr))
111
112;;; Return the sign of bignum (0 or -1) as a fixnum
113(defppclapfunction %bignum-sign ((bignum arg_z))
114 (vector-length imm0 bignum imm0)
115 (la imm0 (- ppc32::misc-data-offset 4) imm0) ; Reference last (most significant) digit
116 (lwzx imm0 bignum imm0)
117 (srawi imm0 imm0 31) ;propagate sign bit
118 (box-fixnum arg_z imm0)
119 (blr))
120
121;;; Count the sign bits in the most significant digit of bignum;
122;;; return fixnum count.
123(defppclapfunction %bignum-sign-bits ((bignum arg_z))
124 (vector-length imm0 bignum imm0)
125 (la imm0 (- ppc32::misc-data-offset 4) imm0) ; Reference last (most significant) digit
126 (lwzx imm0 bignum imm0)
127 (cmpwi imm0 0)
128 (not imm0 imm0)
129 (blt @wasneg)
130 (not imm0 imm0)
131 @wasneg
132 (cntlzw imm0 imm0)
133 (box-fixnum arg_z imm0)
134 (blr))
135
136(defppclapfunction %digit-0-or-plusp ((bignum arg_y) (idx arg_z))
137 (la imm0 ppc32::misc-data-offset idx)
138 (lwzx imm0 bignum imm0)
139 (xoris imm0 imm0 #x8000) ; invert sign bit
140 (srwi imm0 imm0 31)
141 (bit0->boolean arg_z imm0 imm0) ; return T if sign bit was clear before inversion
142 (blr))
143
144;;; For oddp, evenp
145(defppclapfunction %bignum-oddp ((bignum arg_z))
146 (lwz imm0 ppc32::misc-data-offset bignum)
147 (clrlwi imm0 imm0 31)
148 (bit0->boolean arg_z imm0 imm0)
149 (blr))
150
151(defppclapfunction bignum-plusp ((bignum arg_z))
152 (vector-length imm0 bignum imm0)
153 (la imm0 (- ppc32::misc-data-offset 4) imm0) ; Reference last (most significant) digit
154 (lwzx imm0 bignum imm0)
155 (xoris imm0 imm0 #x8000) ; invert sign bit
156 (srwi imm0 imm0 31)
157 (bit0->boolean arg_z imm0 imm0) ; return T if sign bit was clear before inversion
158 (blr))
159
160(defppclapfunction %fixnum-to-bignum-set ((bignum arg_y) (fixnum arg_z))
161 (unbox-fixnum imm0 fixnum)
162 (stw imm0 ppc32::misc-data-offset bignum)
163 (blr))
164
165(defppclapfunction bignum-minusp ((bignum arg_z))
166 (vector-length imm0 bignum imm0)
167 (la imm0 (- ppc32::misc-data-offset 4) imm0) ; Reference last (most significant) digit
168 (lwzx imm0 bignum imm0)
169 (srwi imm0 imm0 31)
170 (rlwimi imm0 imm0 4 27 27)
171 (addi arg_z imm0 (target-nil-value)) ; return T if sign bit was clear before inversion
172 (blr))
173
174
175;;; Add the digits A[I] and B[J], and the incoming carry C (a fixnum).
176;;; Store the result in R[K], and return the outgoing carry.
177;;; If I is NIL, A is a fixnum. If J is NIL, B is a fixnum.
178
179(defppclapfunction %add-with-carry ((r 12) (k 8) (c 4) (a 0) (i arg_x) (b arg_y) (j arg_z))
180 (cmpwi cr1 j (target-nil-value))
181 (cmpwi cr0 i (target-nil-value))
182 (lwz temp0 a vsp)
183 (unbox-fixnum imm1 temp0)
184 (unbox-fixnum imm2 b)
185 (beq cr0 @got-a)
186 (la imm1 ppc32::misc-data-offset i)
187 (lwzx imm1 temp0 imm1)
188 @got-a
189 (beq cr1 @got-b)
190 (la imm2 ppc32::misc-data-offset j)
191 (lwzx imm2 b imm2)
192 @got-b
193 (lwz temp0 c vsp)
194 (unbox-fixnum imm0 temp0)
195 (addic imm0 imm0 -1)
196 (lwz temp1 r vsp)
197 (lwz temp0 k vsp)
198 (la vsp 16 vsp)
199 (adde imm0 imm1 imm2)
200 (la imm2 ppc32::misc-data-offset temp0)
201 (stwx imm0 temp1 imm2)
202 (addze imm0 rzero)
203 (box-fixnum arg_z imm0)
204 (blr))
205
206
207
208
209
210;;; Store the result of A[I] - B[J] - borrow into R[K], returning the borrow.
211;;; If I is NIL, A is a fixnum; likewise for J and B.
212(defppclapfunction %subtract-with-borrow ((r 12) (k 8) (borrow 4) (a 0) (i arg_x) (b arg_y) (j arg_z))
213 (cmpwi cr0 i (target-nil-value))
214 (cmpwi cr1 j (target-nil-value))
215 (lwz temp0 a vsp)
216 (unbox-fixnum imm2 b)
217 (unbox-fixnum imm1 temp0)
218 (beq cr1 @got-b)
219 (la imm2 ppc32::misc-data-offset j)
220 (lwzx imm2 b imm2)
221 @got-b
222 (beq cr0 @got-a)
223 (la imm1 ppc32::misc-data-offset i)
224 (lwzx imm1 temp0 imm1)
225 @got-a
226 (lwz temp0 borrow vsp)
227 (unbox-fixnum imm0 temp0)
228 (addic imm0 imm0 -1)
229 (lwz temp0 r vsp)
230 (lwz temp1 k vsp)
231 (la vsp 16 vsp)
232 (subfe imm0 imm2 imm1)
233 (la imm1 ppc32::misc-data-offset temp1)
234 (stwx imm0 temp0 imm1)
235 (addze imm0 rzero)
236 (box-fixnum arg_z imm0)
237 (blr))
238
239;; multiply i'th digit of x by y and add to result starting at digit i
240(defppclapfunction %multiply-and-add-harder-loop-2
241 ((x-ptr 4) (y-ptr 0) (resptr arg_x)(residx arg_y) (count arg_z))
242 (let ((tem imm0)
243 (y imm1)
244 (prod-h imm2)
245 (prod-l imm3)
246 (x imm4)
247 (xptr temp2)
248 (yidx temp1)
249 (yptr temp0))
250 (lwz xptr x-ptr vsp)
251 (la tem ppc32::misc-data-offset residx)
252 (lwzx x xptr tem)
253 (lwz yptr y-ptr vsp)
254 (li yidx 0) ; init yidx 0
255 (addc prod-h rzero rzero) ; init carry 0, mumble 0
256 @loop
257 (subi count count '1)
258 (cmpwi count 0)
259 (la tem ppc32::misc-data-offset yidx) ; get yidx
260 (lwzx y yptr tem)
261 (mullw prod-l x y)
262 (addc prod-l prod-l prod-h)
263 (mulhwu prod-h x y)
264 (addze prod-h prod-h)
265 (la tem ppc32::misc-data-offset residx)
266 (lwzx y resptr tem)
267 (addc prod-l prod-l y)
268 (addze prod-h prod-h)
269 (stwx prod-l resptr tem)
270 (addi residx residx '1)
271 (addi yidx yidx '1)
272 (bgt @loop)
273 (la tem ppc32::misc-data-offset residx)
274 (stwx prod-h resptr tem)
275 (la vsp 8 vsp)
276 (blr)))
277
278
279
280;;; Multiply X[I] by the unboxed value of the (non-negative) fixnum Y;
281;;; add the incoming carry from CARRY[0] to the 64-bit product. Store
282;;; the low word of the 64-bit sum in R[0] and the high word in
283;;; CARRY[0].
284
285(defppclapfunction %multiply-and-add ((r 4) (carry 0) (x arg_y) (i arg_x) (y arg_z))
286 (unbox-fixnum imm0 arg_z)
287 (la imm1 ppc32::misc-data-offset i)
288 (lwzx imm1 x imm1)
289 (mulhwu imm2 imm0 imm1)
290 (mullw imm1 imm0 imm1)
291 (lwz temp0 carry vsp)
292 (lwz imm0 ppc32::misc-data-offset temp0)
293 (addc imm1 imm1 imm0)
294 (addze imm2 imm2)
295 (stw imm2 ppc32::misc-data-offset temp0)
296 (lwz arg_z r vsp)
297 (la vsp 8 vsp)
298 (stw imm1 ppc32::misc-data-offset arg_z)
299 (blr))
300
301(defppclapfunction %multiply-and-add-fixnum-loop ((len64 0) (x arg_x) (y arg_y) (result arg_z))
302 (let ((carry imm4)
303 (iidx imm3)
304 (unboxed-y imm0)
305 (i temp0)
306 (hi imm2)
307 (rlen temp1))
308 (vpop rlen)
309 (li carry 0)
310 (li iidx ppc32::misc-data-offset)
311 (li i 0)
312 (b @test)
313 @loop
314 (unbox-fixnum unboxed-y y)
315 (lwzx imm1 x iidx)
316 (mulhwu hi imm1 unboxed-y)
317 (mullw imm0 imm1 unboxed-y)
318 (addc imm0 imm0 carry)
319 (addze carry hi)
320 (stwx imm0 result iidx)
321 (la iidx 4 iidx)
322 (la i '1 i)
323 @test
324 (cmpw i rlen)
325 (blt @loop)
326 (stwx carry result iidx)
327 (blr)))
328
329(defppclapfunction %floor ((q 4) (r 0) (num-high arg_x) (num-low arg_y) (denom-arg arg_z))
330 (let ((rem imm0)
331 (rem-low imm1)
332 (quo imm2)
333 (temp imm3)
334 (denom imm4))
335 (lwz denom ppc32::misc-data-offset denom)
336 (lwz rem ppc32::misc-data-offset num-high)
337 (lwz rem-low ppc32::misc-data-offset num-low)
338 (mr temp denom)
339 (sltu quo rem denom)
340 (subi temp temp quo)
341 (and temp temp denom)
342 (sub rem temp rem)
343 (li temp0 '32)
344 @loop
345 (subi temp0 temp0 '1)
346 (cmpwi temp0 0)
347 (slwi rem rem 1)
348 (srwi temp rem-low 31)
349 (or rem rem temp)
350 (slwi rem-low rem-low 1)
351 (sltu rem rem denom)
352 (slwi quo quo 1)
353 (or quo quo temp)
354 (subi temp temp 1)
355 (and temp temp denom)
356 (sub rem rem temp)
357 (bne @loop)
358 (not quo quo)
359 (lwz temp0 q vsp)
360 (stw quo ppc32::misc-data-offset temp0)
361 (lwz arg_z r vsp)
362 (la vsp 8 vsp)
363 (stw rem ppc32::misc-data-offset arg_z)
364 (blr)))
365
366(defppclapfunction %bignum-ref-hi ((bignum arg_y) (i arg_z))
367 (la imm1 ppc32::misc-data-offset i)
368 (lhzx imm0 bignum imm1)
369 (box-fixnum arg_z imm0)
370 (blr))
371
372
373(defppclapfunction %bignum-set ((bignum 0) (i arg_x) (high arg_y) (low arg_z))
374 (compose-digit imm0 high low)
375 (lwz arg_z bignum vsp)
376 (vset32 imm0 arg_z i imm1)
377 (la vsp 4 vsp)
378 (blr))
379
380
381
382
383; this is silly
384(defppclapfunction %add-the-carry ((b-h arg_x) (b-l arg_y) (carry-in arg_z))
385 (let ((a imm0)
386 (b imm1)
387 (temp imm2)
388 (c imm3))
389 (compose-digit b b-h b-l)
390 (unbox-fixnum c carry-in)
391 (add b c b)
392 (digit-h temp0 b)
393 (digit-l temp1 b)
394 (vpush temp0)
395 (vpush temp1)
396 (la temp0 8 vsp)
397 (set-nargs 2)
398 (ba .SPvalues)))
399
400
401
402
403;;; %SUBTRACT-WITH-BORROW -- Internal.
404;;;
405;;; This should be in assembler, and should not cons intermediate results. It
406;;; returns a 32bit digit and a borrow resulting from subtracting b from a, and
407;;; subtracting a possible incoming borrow.
408;;;
409;;; We really do: a - b - 1 + borrow, where borrow is either 0 or 1.
410;;;
411
412(defppclapfunction %subtract-with-borrow-1 ((a-h 4) (a-l 0) (b-h arg_x) (b-l
413arg_y) (borrow-in arg_z))
414 (let ((a imm0)
415 (b imm1)
416 (temp imm2)
417 (c imm3))
418 (lwz temp0 a-h vsp)
419 (lwz temp1 a-l vsp)
420 (compose-digit a temp0 temp1)
421 (compose-digit b b-h b-l)
422 (unbox-fixnum c borrow-in)
423 (li temp -1)
424 (addc temp c temp)
425 (subfe a b a)
426 (addze c rzero)
427 (box-fixnum c c)
428 (digit-h temp0 a)
429 (digit-l temp1 a)
430 (vpush temp0)
431 (vpush temp1)
432 (vpush c)
433 (la temp0 20 vsp)
434 (set-nargs 3)
435 (ba .SPvalues)))
436
437
438
439(defppclapfunction %subtract-one ((a-h arg_y)(a-l arg_z))
440 (let ((a imm0))
441 (compose-digit a a-h a-l)
442 (subi a a 1)
443 (digit-h temp0 a)
444 (vpush temp0)
445 (digit-l temp0 a)
446 (vpush temp0)
447 (la temp0 8 vsp)
448 (set-nargs 2)
449 (ba .spvalues)))
450
451
452
453
454;;; %MULTIPLY-AND-ADD -- Internal.
455;;;
456;;; This multiplies x-digit and y-digit, producing high and low digits
457;;; manifesting the result. Then it adds the low digit, res-digit, and
458;;; carry-in-digit. Any carries (note, you still have to add two digits at a
459;;; time possibly producing two carries) from adding these three digits get
460;;; added to the high digit from the multiply, producing the next carry digit.
461;;; Res-digit is optional since two uses of this primitive multiplies a single
462;;; digit bignum by a multiple digit bignum, and in this situation there is no
463;;; need for a result buffer accumulating partial results which is where the
464;;; res-digit comes from.
465;;; [slh] I assume that the returned carry "digit" can only be 0, 1 or 2
466
467
468(defppclapfunction %multiply-and-add-1 ((x-high 8)
469 (x-low 4)
470 (y-high 0)
471 (y-low arg_x)
472 (carry-in-high arg_y)
473 (carry-in-low arg_z))
474 (let ((x imm0)
475 (y imm1)
476 (carry-in imm2)
477 (lo imm3)
478 (hi imm4))
479 (compose-digit carry-in carry-in-high carry-in-low)
480 (vpop temp0)
481 (compose-digit y temp0 y-low)
482 (vpop temp0)
483 (vpop temp1)
484 (compose-digit x temp1 temp0)
485 (mullw lo x y)
486 (mulhwu hi x y)
487 (addc lo lo carry-in)
488 (addze hi hi)
489 (digit-h temp0 hi)
490 (digit-l temp1 hi)
491 (digit-h temp2 lo)
492 (digit-l temp3 lo)
493 (vpush temp0)
494 (vpush temp1)
495 (vpush temp2)
496 (vpush temp3)
497 (set-nargs 4)
498 (la temp0 16 vsp)
499 (ba .SPvalues)))
500
501
502(defppclapfunction %logcount-complement ((bignum arg_y) (idx arg_z))
503 (let ((arg imm0)
504 (shift imm1)
505 (temp imm2))
506 (la arg ppc32::misc-data-offset idx)
507 (lwzx arg bignum arg)
508 (not. shift arg)
509 (li arg_z 0)
510 (if ne
511 (progn
512 @loop
513 (la temp -1 shift)
514 (and. shift shift temp)
515 (la arg_z '1 arg_z)
516 (bne @loop)))
517 (blr)))
518
519(defppclapfunction %logcount ((bignum arg_y) (idx arg_z))
520 (let ((arg imm0)
521 (shift imm1)
522 (temp imm2))
523 (la arg ppc32::misc-data-offset idx)
524 (lwzx arg bignum arg)
525 (mr. shift arg)
526 (li arg_z 0)
527 (if ne
528 (progn
529 @loop
530 (la temp -1 shift)
531 (and. shift shift temp)
532 (la arg_z '1 arg_z)
533 (bne @loop)))
534 (blr)))
535
536; return res
537(defppclapfunction bignum-add-loop-2 ((aptr arg_x)(bptr arg_y) (result arg_z))
538 (let ((idx imm0)
539 (count imm1)
540 (x imm2)
541 (y imm3)
542 (len-a temp0)
543 (len-b temp1)
544 (tem temp2))
545 (li idx ppc32::misc-data-offset)
546 (lwz imm4 ppc32::misc-header-offset aptr)
547 (header-length len-a imm4)
548 (lwz imm4 ppc32::misc-header-offset bptr)
549 (header-length len-b imm4)
550 ; make a be shorter one
551 (cmpw len-a len-b)
552 (li count 0)
553 ; initialize carry 0
554 (addc x rzero rzero)
555 (ble @loop)
556 ; b shorter - swap em
557 (mr tem len-a)
558 (mr len-a len-b)
559 (mr len-b tem)
560 (mr tem aptr)
561 (mr aptr bptr)
562 (mr bptr tem)
563 @loop
564 (lwzx y aptr idx)
565 (lwzx x bptr idx)
566 (addi count count '1)
567 (cmpw count len-a)
568 (adde x x y)
569 (stwx x result idx)
570 (addi idx idx '1)
571 (blt @loop)
572 ; now propagate carry thru longer (b) using sign of shorter
573 ;(SUBI imm4 idx '1) ; y has hi order word of a
574 ;(lwzx y aptr imm4)
575 (cmpw len-a len-b)
576 (adde imm4 rzero rzero) ; get carry
577 (srawi y y 31) ; p.o.s clobbers carry
578 (addic imm4 imm4 -1) ; restore carry
579 (beq @l3) ; unless equal
580 @loop2
581 (lwzx x bptr idx)
582 (adde x x y)
583 (stwx x result idx)
584 (addi count count '1)
585 (cmpw count len-b)
586 (addi idx idx '1)
587 (blt @loop2)
588 ; y has sign of shorter - get sign of longer to x
589 @l3
590 (subi imm4 idx '1)
591 (lwzx x bptr imm4)
592 (adde imm4 rzero rzero) ; get carry
593 (srawi x x 31) ; clobbers carry
594 (addic imm4 imm4 -1)
595 (adde x x y)
596 (stwx x result idx)
597 (blr)))
598
599;; same as above but with initial a index and finishes
600(defppclapfunction bignum-add-loop-+ ((init-a 0)(aptr arg_x)(bptr arg_y)(length arg_z))
601 (let ((idx imm0)
602 (count imm1)
603 (x imm2)
604 (y imm3)
605 (aidx imm4))
606 (li idx ppc32::misc-data-offset)
607 (lwz aidx init-a vsp)
608 (addi aidx aidx ppc32::misc-data-offset)
609 (li count 0)
610 ; initialize carry 0
611 (addc x rzero rzero)
612 @loop
613 (lwzx x aptr aidx)
614 (lwzx y bptr idx)
615 (adde x x y)
616 (stwx x aptr aidx)
617 (addi count count '1)
618 (cmpw count length)
619 (addi idx idx '1)
620 (addi aidx aidx '1)
621 (blt @loop)
622 (lwzx x aptr aidx) ; add carry into next one
623 (adde x x rzero)
624 (stwx x aptr aidx)
625 (la vsp 4 vsp)
626 (blr)))
627
628
629
630(defppclapfunction bignum-negate-loop-really ((big arg_x) (len arg_y) (result arg_z))
631 (let ((idx imm0)
632 (one imm1)
633 (x imm2))
634 (li idx ppc32::misc-data-offset)
635 (li one '1)
636 ; initialize carry 1
637 (li x -1)
638 (addic x x 1)
639 @loop
640 ;(addi count count '1)
641 ;(cmpw count len)
642 (subf. len one len)
643 (lwzx x big idx)
644 (not x x)
645 (adde x x rzero)
646 (stwx x result idx)
647 (addi idx idx '1)
648 (bgt @loop)
649 ; return carry
650 (li x 0)
651 (adde x x rzero)
652 (box-fixnum arg_z x)
653 (blr)))
654
655(defppclapfunction bignum-negate-to-pointer ((big arg_x) (len arg_y) (result arg_z))
656 (let ((idx imm0)
657 (one imm1)
658 (x imm2)
659 (oidx imm3)
660 (ptr imm4))
661 (li idx ppc32::misc-data-offset)
662 (li oidx 0)
663 (macptr-ptr ptr result)
664 (li one '1)
665 ; initialize carry 1
666 (li x -1)
667 (addic x x 1)
668 @loop
669 ;(addi count count '1)
670 ;(cmpw count len)
671 (subf. len one len)
672 (lwzx x big idx)
673 (not x x)
674 (adde x x rzero)
675 (stwx x ptr oidx)
676 (addi idx idx '1)
677 (addi oidx oidx 4)
678 (bgt @loop)
679 ; return carry
680 (li x 0)
681 (adde x x rzero)
682 (box-fixnum arg_z x)
683 (blr)))
684
685;; she do tolerate len = jidx
686(defppclapfunction bignum-shift-left-loop ((nbits 4)(result 0) (bignum arg_x) (len arg_y) (jidx arg_z))
687 (let ((y imm0)
688 (idx imm1)
689 (bits imm2)
690 (rbits imm3)
691 (x imm4)
692 (iidx temp0)
693 (resptr temp1))
694 (li iidx 0)
695 (lwz bits nbits vsp)
696 (lwz resptr result vsp)
697 (unbox-fixnum bits bits)
698 (subfic rbits bits 32)
699 ;(dbg)
700 (lwz imm4 ppc32::misc-data-offset bignum)
701 (slw imm4 imm4 bits)
702 (la y (+ ppc32::misc-data-offset -4) jidx)
703 (stwx imm4 y resptr)
704
705 (cmpw len jidx)
706 (beq @done)
707 @loop
708 (addi idx iidx ppc32::misc-data-offset)
709 (lwzx x bignum idx)
710 (srw x x rbits)
711 (addi idx idx '1)
712 (lwzx y bignum idx)
713 (slw y y bits)
714 (or x x y)
715 (addi idx jidx ppc32::misc-data-offset)
716 (stwx x resptr idx)
717 (addi jidx jidx '1)
718 (cmpw jidx len)
719 (addi iidx iidx '1)
720 (blt @loop)
721 @done
722 ; do first - lo order
723
724 ; do last - hi order
725 (addi idx iidx ppc32::misc-data-offset)
726 ;(dbg t)
727 (lwzx y bignum idx)
728 (sraw y y rbits)
729 (addi idx len ppc32::misc-data-offset)
730 (stwx y resptr idx)
731 (la vsp 8 vsp)
732 (blr)))
733
734
735
736(defppclapfunction bignum-shift-right-loop-1 ((nbits 4)(result 0) (bignum arg_x) (len arg_y) (iidx arg_z))
737 (let ((y imm0)
738 (idx imm1)
739 (bits imm2)
740 (rbits imm3)
741 (x imm4)
742 (jidx temp0)
743 (resptr temp1))
744 (li jidx 0)
745 (lwz bits nbits vsp)
746 (lwz resptr result vsp)
747 (unbox-fixnum bits bits)
748 (cmpw jidx len)
749 (subfic rbits bits 32)
750 (bge @done)
751 @loop
752 (addi idx iidx ppc32::misc-data-offset)
753 (lwzx x bignum idx)
754 (srw x x bits)
755 (addi idx idx '1)
756 (lwzx y bignum idx)
757 (slw y y rbits)
758 (or x x y)
759 (addi idx jidx ppc32::misc-data-offset)
760 (stwx x resptr idx)
761 (addi jidx jidx '1)
762 (cmpw jidx len)
763 (addi iidx iidx '1)
764 (blt @loop)
765 @done
766 (addi idx iidx ppc32::misc-data-offset)
767 (lwzx x bignum idx)
768 (sraw x x bits)
769 (addi idx jidx ppc32::misc-data-offset)
770 (stwx x resptr idx)
771 (la vsp 8 vsp)
772 (blr)))
773
774
775(defppclapfunction %compare-digits ((a arg_x) (b arg_y) (idx arg_z))
776 (la imm0 ppc32::misc-data-offset idx)
777 (lwzx imm1 a imm0)
778 (lwzx imm0 b imm0)
779 (cmplw imm1 imm0)
780 (li arg_z '0)
781 (beqlr)
782 (li arg_z '1)
783 (bgtlr)
784 (li arg_z '-1)
785 (blr))
786
787
788
789;; returns number of bits in digit-hi,digit-lo that are sign bits
790;; 32 - digits-sign-bits is integer-length
791
792(defppclapfunction %digits-sign-bits ((hi arg_y) (lo arg_z))
793 (rlwinm. imm1 hi (- 16 ppc32::fixnumshift) 0 15)
794 (rlwimi imm1 lo (- 32 ppc32::fixnumshift) 16 31)
795 (not imm1 imm1)
796 (blt @wasneg)
797 (not imm1 imm1)
798 @wasneg
799 (cntlzw imm1 imm1)
800 (box-fixnum arg_z imm1)
801 (blr))
802
803(defppclapfunction bignum-logtest-loop ((count arg_x) (s1 arg_y) (s2 arg_z))
804 (addi imm1 rzero ppc32::misc-data-offset)
805 @loop
806 (lwzx imm2 s1 imm1)
807 (lwzx imm3 s2 imm1)
808 (and. imm2 imm3 imm2)
809 (addi imm1 imm1 4)
810 (bne @true)
811 (subic. count count 4)
812 (bgt @loop)
813 (li arg_z (target-nil-value))
814 (blr)
815 @true
816 (li arg_z (+ (target-nil-value) ppc32::t-offset))
817 (blr))
818
819;;; dest[idx] <- (lognot src[idx])
820(defppclapfunction %bignum-lognot ((idx arg_x) (src arg_y) (dest arg_z))
821 (la imm1 ppc32::misc-data-offset idx)
822 (lwzx imm0 src imm1)
823 (not imm0 imm0)
824 (stwx imm0 dest imm1)
825 (blr))
826
827;;; dest[idx] <- (logand x[idx] y[idx])
828(defppclapfunction %bignum-logand ((idx 0) (x arg_x) (y arg_y) (dest arg_z))
829 (vpop temp0)
830 (la imm1 ppc32::misc-data-offset temp0)
831 (lwzx imm0 x imm1)
832 (lwzx imm2 y imm1)
833 (and imm0 imm0 imm2)
834 (stwx imm0 dest imm1)
835 (blr))
836
837;;; dest[idx] <- (logandc2 x[idx] y[idx])
838(defppclapfunction %bignum-logandc2 ((idx 0) (x arg_x) (y arg_y) (dest arg_z))
839 (vpop temp0)
840 (la imm1 ppc32::misc-data-offset temp0)
841 (lwzx imm0 x imm1)
842 (lwzx imm2 y imm1)
843 (andc imm0 imm0 imm2)
844 (stwx imm0 dest imm1)
845 (blr))
846
847;;; dest[idx] <- (logandc1 x[idx] y[idx])
848(defppclapfunction %bignum-logandc1 ((idx 0) (x arg_x) (y arg_y) (dest arg_z))
849 (vpop temp0)
850 (la imm1 ppc32::misc-data-offset temp0)
851 (lwzx imm0 x imm1)
852 (lwzx imm2 y imm1)
853 (andc imm0 imm2 imm0)
854 (stwx imm0 dest imm1)
855 (blr))
856
857
858
859(defppclapfunction digit-lognot-move ((index arg_x) (source arg_y) (dest arg_z))
860 (let ((scaled-index imm1))
861 (vref32 imm0 source index scaled-index) ; imm1 has c(index) + data-offset
862 (not imm0 imm0)
863 (stwx imm0 dest scaled-index)
864 (blr)))
865
866; if dest not nil store unboxed result in dest(0), else return boxed result
867(defppclapfunction fix-digit-logandc2 ((fix arg_x) (big arg_y) (dest arg_z)) ; index 0
868 (let ((w1 imm0)
869 (w2 imm1))
870 (unbox-fixnum w1 fix)
871 (lwz w2 ppc32::misc-data-offset big)
872 (cmpwi dest (target-nil-value))
873 (not w2 w2)
874 (and w1 w1 w2)
875 (bne @store)
876 (box-fixnum arg_z w1)
877 (blr)
878 @store
879 (stw w1 ppc32::misc-data-offset dest)
880 (blr)))
881
882
883
884(defppclapfunction fix-digit-logand ((fix arg_x) (big arg_y) (dest arg_z)) ; index 0
885 (let ((w1 imm0)
886 (w2 imm1))
887 (unbox-fixnum w1 fix)
888 (lwz w2 ppc32::misc-data-offset big)
889 (cmpwi dest (target-nil-value))
890 (and w1 w1 w2)
891 (bne @store)
892 (box-fixnum arg_z w1)
893 (blr)
894 @store
895 (stw w1 ppc32::misc-data-offset dest)
896 (blr)))
897
898
899
900(defppclapfunction fix-digit-logandc1 ((fix arg_x) (big arg_y) (dest arg_z)) ; index 0
901 (let ((w1 imm0)
902 (w2 imm1))
903 (unbox-fixnum w1 fix)
904 (lwz w2 ppc32::misc-data-offset big)
905 (cmpwi dest (target-nil-value))
906 (not w1 w1)
907 (and w1 w1 w2)
908 (bne @store)
909 (box-fixnum arg_z w1)
910 (blr)
911 @store
912 (stw w1 ppc32::misc-data-offset dest)
913 (blr)))
914
915;;; dest[idx] <- (logior x[idx] y[idx])
916(defppclapfunction %bignum-logior ((idx 0) (x arg_x) (y arg_y) (dest arg_z))
917 (vpop temp0)
918 (la imm1 ppc32::misc-data-offset temp0)
919 (lwzx imm0 x imm1)
920 (lwzx imm2 y imm1)
921 (or imm0 imm0 imm2)
922 (stwx imm0 dest imm1)
923 (blr))
924
925;;; dest[idx] <- (logxor x[idx] y[idx])
926(defppclapfunction %bignum-logxor ((idx 0) (x arg_x) (y arg_y) (dest arg_z))
927 (vpop temp0)
928 (la imm1 ppc32::misc-data-offset temp0)
929 (lwzx imm0 x imm1)
930 (lwzx imm2 y imm1)
931 (xor imm0 imm0 imm2)
932 (stwx imm0 dest imm1)
933 (blr))
934
935
936
937(defppclapfunction bignum-xor-loop ((count 0) (s1 arg_x) (s2 arg_y) (dest arg_z))
938 (lwz imm0 count vsp)
939 (addi imm1 rzero ppc32::misc-data-offset)
940 @loop
941 (lwzx imm2 s1 imm1)
942 (lwzx imm3 s2 imm1)
943 (xor imm2 imm3 imm2)
944 (subic. imm0 imm0 4)
945 (stwx imm2 dest imm1)
946 (addi imm1 imm1 4)
947 (bgt @loop)
948 @out
949 (la vsp 4 vsp)
950 (blr))
951
952#+nomore
953(defppclapfunction try-guess-loop-1 ((guess-h 8)(guess-l 4)(len-y 0)
954 (xidx arg_x) (xptr arg_y) (yptr arg_z))
955 (let ((guess imm0)
956 (carry imm1)
957 (y imm2)
958 (x imm2)
959 (prod-l imm3)
960 (prod-h imm4)
961 (tem imm4)
962 (yidx temp0)
963 (end-y temp1)
964 (carry-bit temp2))
965 (lwz x guess-h vsp)
966 (lwz tem guess-l vsp)
967 (compose-digit guess x tem)
968 (lwz end-y len-y vsp)
969 (li yidx 0)
970 (li carry 0)
971 (li carry-bit '1)
972 @loop
973 ; multiply guess by ydigit, add carry to lo, hi is new carry
974 ; then get an xdigit subtract prod-lo from it and store result in x (remember carry)
975 (addi tem yidx ppc32::misc-data-offset) ; get yidx
976 (lwzx y yptr tem)
977 (mullw prod-l guess y)
978 (mulhwu prod-h guess y)
979 (addc prod-l prod-l carry)
980 (adde carry prod-h rzero)
981 ; get back saved carry
982 (li tem '-1)
983 (addc tem carry-bit tem)
984 (addi tem xidx ppc32::misc-data-offset)
985 (lwzx x xptr tem)
986 (subfe x prod-l x)
987 (stwx x xptr tem)
988 ; save carry
989 (adde prod-l rzero rzero)
990 (box-fixnum carry-bit prod-l)
991 (addi yidx yidx '1)
992 (cmpw yidx end-y)
993 (addi xidx xidx '1)
994 (blt @loop)
995 ; finally subtract carry from last x digit
996 @done
997 (li prod-l '-1) ; get back saved carry again - box clobbered it?
998 (addc prod-l carry-bit prod-l)
999 (addi tem xidx ppc32::misc-data-offset) ; maybe still there - nope
1000 (lwzx x xptr tem)
1001 (subfe x carry x)
1002 (stwx x xptr tem)
1003 (la vsp 12 vsp)
1004 (blr)))
1005
1006;; x0 is at index, x1 at index-1, x2 at index-2
1007;; y1 is at index, y2 at index-1
1008;; this doesnt help much
1009(defppclapfunction truncate-guess-loop ((guess-h 8)(guess-l 4)(x 0)
1010 (xidx arg_x)(yptr arg_y) (yidx arg_z))
1011 (let ((guess imm0)
1012 (y1 imm1)
1013 (y2 imm1)
1014 (gy1-lo imm2) ; look out below
1015 (gy1-hi imm2)
1016 (gy2-lo imm2)
1017 (gy2-hi imm2)
1018 (xptr temp0)
1019 (m imm3)
1020 (tem imm4)
1021 (y1-idx 28)
1022 (y2-idx 24)
1023 (x0-idx 20)
1024 (x1-idx 16)
1025 (x2-idx 12))
1026 (stwu tsp -32 tsp)
1027 (stw tsp 4 tsp)
1028 (lwz y1 guess-h vsp)
1029 (lwz tem guess-l vsp)
1030 (compose-digit guess y1 tem)
1031 (addi tem yidx ppc32::misc-data-offset)
1032 (lwzx y1 yptr tem)
1033 (stw y1 y1-idx tsp)
1034 (subi tem tem 4)
1035 (lwzx y2 yptr tem)
1036 (stw y2 y2-idx tsp)
1037 (lwz xptr x vsp)
1038 (addi tem xidx ppc32::misc-data-offset)
1039 (lwzx y1 xptr tem) ; its x0
1040 (stw y1 x0-idx tsp)
1041 (subi tem tem 4)
1042 (lwzx y1 xptr tem)
1043 (stw y1 x1-idx tsp)
1044 (subi tem tem 4)
1045 (lwzx y1 xptr tem)
1046 (stw y1 x2-idx tsp)
1047 @loop
1048 (lwz y1 y1-idx tsp) ; get y1
1049 (mullw gy1-lo guess y1)
1050 (lwz m x1-idx tsp) ; get x1
1051 (subc m m gy1-lo) ; x1 - gy1-lo => m
1052 (mulhwu gy1-hi guess y1)
1053 (lwz tem x0-idx tsp) ; get x0
1054 (subfe. tem gy1-hi tem) ; - val not used just cr
1055 (lwz y2 y2-idx tsp) ; get y2
1056 (mulhwu gy2-hi guess y2) ; does it pay to do this now even tho may not need?
1057 (bne @done)
1058 (cmpl :cr0 gy2-hi m) ; if > or = and foo then more - L means logical means unsigned
1059 (blt @done) ; if < done
1060 (bne @more) ; if = test lo
1061 (mullw gy2-lo guess y2)
1062 (lwz tem x2-idx tsp) ; get x2
1063 (cmpl :cr0 gy2-lo tem)
1064 (ble @done)
1065 @more
1066 (subi guess guess 1)
1067 (b @loop)
1068 @done
1069 (digit-h temp0 guess)
1070 (vpush temp0)
1071 (digit-l temp0 guess)
1072 (vpush temp0)
1073 (la temp0 20 vsp)
1074 (lwz tsp 0 tsp)
1075 (set-nargs 2)
1076 (ba .spvalues)))
1077
1078(defppclapfunction normalize-bignum-loop ((sign arg_x)(res arg_y)(len arg_z))
1079 (let ((idx imm0)
1080 (usign imm1)
1081 (val imm2))
1082 (unbox-fixnum usign sign)
1083 (cmpwi len 0)
1084 (addi idx len (- ppc32::misc-data-offset 4))
1085 (beqlr) ; huh - can this ever happen?
1086 @loop
1087 (lwzx val res idx)
1088 (cmpw val usign)
1089 (subi idx idx '1)
1090 (bne @neq)
1091 (subic. len len '1)
1092 (bgt @loop)
1093 ; fall through - its all sign - return 1
1094 (li arg_z '1)
1095 (blr)
1096 @neq
1097 (rlwinm usign usign 0 0 0) ; hi bit
1098 (rlwinm val val 0 0 0)
1099 (cmpw usign val) ; is hi bit = sign, if so then done
1100 (beqlr)
1101 (addi len len '1) ; if not, need 1 more
1102 (blr)))
1103
1104(defppclapfunction %normalize-bignum-2 ((fixp arg_y)(res arg_z))
1105 (let ((idx imm0)
1106 (usign imm1)
1107 (val imm2)
1108 (len arg_x)
1109 (oldlen temp0))
1110 (lwz imm4 (- ppc32::fulltag-misc) res)
1111 (header-length len imm4)
1112 (cmpwi len 0)
1113 (mr oldlen len)
1114 (addi idx len (- ppc32::misc-data-offset 4))
1115 (beqlr) ; huh - can this ever happen?
1116 (lwzx val res idx) ; high order word
1117 (srawi usign val 31) ; get sign
1118 @loop
1119 (lwzx val res idx)
1120 (cmpw val usign)
1121 (subi idx idx '1)
1122 (bne @neq)
1123 (subic. len len '1)
1124 (bgt @loop)
1125 ; fall through - its all sign - return 1
1126 (li len '1)
1127 (rlwinm usign usign 0 0 0) ; hi bit
1128 (b @more)
1129 @neq
1130 (rlwinm usign usign 0 0 0) ; hi bit
1131 (rlwinm val val 0 0 0)
1132 (cmpw usign val) ; is hi bit = sign, if so then done
1133 (beq @more)
1134 (addi len len '1) ; if not, need 1 more
1135 (b @big)
1136 @more
1137 (cmpwi :cr1 fixp (target-nil-value))
1138 (cmpwi len '1)
1139 (beq :cr1 @big) ; dont return fixnum
1140 (bgt @big)
1141 ;; stuff for maybe fixnum
1142 ;(dbg t)
1143 (lwz val ppc32::misc-data-offset res)
1144 (rlwinm imm4 val 0 0 2) ; hi 3 bits same? - we assume fixnumshift is 2
1145 (srawi usign usign 2)
1146 (cmpw usign imm4)
1147 (bne @big)
1148 (box-fixnum arg_z val)
1149 (blr)
1150 @big
1151 (cmpw oldlen len)
1152 (beqlr) ; same length - done
1153 (li imm4 ppc32::subtag-bignum) ; set new length
1154 (rlwimi imm4 len (- ppc32::num-subtag-bits ppc32::fixnumshift) 0 (- 31 ppc32::num-subtag-bits))
1155 (stw imm4 ppc32::misc-header-offset res)
1156 ; 0 to tail if negative
1157 (cmpwi usign 0)
1158 (beqlr)
1159 ; zero from len inclusive to oldlen exclusive
1160 ;(dbg t)
1161 (addi idx len ppc32::misc-data-offset)
1162 @loop2
1163 (stwx rzero idx res)
1164 (addi len len '1)
1165 (cmpw len oldlen)
1166 (addi idx idx '1)
1167 (blt @loop2)
1168 (blr)))
1169
1170(defppclapfunction %count-digit-leading-zeros ((high arg_y) (low arg_z))
1171 (compose-digit imm0 high low)
1172 (cntlzw imm0 imm0)
1173 (box-fixnum arg_z imm0)
1174 (blr))
1175
1176(defppclapfunction %count-digit-trailing-zeros ((high arg_y) (low arg_z))
1177 (compose-digit imm0 high low)
1178 (neg imm1 imm0)
1179 (and imm0 imm0 imm1)
1180 (cntlzw imm0 imm0)
1181 (subfic imm0 imm0 31)
1182 (box-fixnum arg_z imm0)
1183 (blr))
1184
1185
1186(defppclapfunction %bignum-count-trailing-zero-bits ((bignum arg_z))
1187 (let ((ndigits arg_x)
1188 (nbits arg_y)
1189 (digit imm0)
1190 (ptr imm1))
1191 (li ptr ppc32::misc-data-offset)
1192 (li ndigits '-32)
1193 @next
1194 (lwzx digit bignum ptr)
1195 (cmpwi digit 0)
1196 (la ptr 4 ptr)
1197 (addi ndigits ndigits '32)
1198 (beq @next)
1199 (neg ptr digit)
1200 (and digit digit ptr)
1201 (cntlzw digit digit)
1202 (subfic digit digit 31)
1203 (box-fixnum nbits digit)
1204 (add arg_z nbits ndigits)
1205 (blr)))
1206
1207
1208(defppclapfunction %bignum-trim-leading-zeros ((bignum arg_x)
1209 (start arg_y)
1210 (len arg_z))
1211 (add imm1 start len)
1212 (la imm1 (- ppc32::misc-data-offset 4) imm1)
1213 @loop
1214 (cmpwi cr0 len '1)
1215 (lwzx imm0 bignum imm1)
1216 (cmpwi cr1 imm0 0)
1217 (la imm1 -4 imm1)
1218 (bnelr cr1)
1219 (la len '-1 len)
1220 (bne @loop)
1221 (blr))
1222
1223;;; Set length of bignum to new-len (zeroing out any trailing words between
1224;;; the old length and the new.
1225(defppclapfunction %shrink-bignum ((new-len arg_y) (bignum arg_z))
1226 (let ((old-len temp0)
1227 (old-idx imm0)
1228 (new-idx imm2)
1229 (header imm1))
1230 (getvheader header bignum)
1231 (header-length old-len header)
1232 (cmpw old-len new-len)
1233 (la old-idx ppc32::misc-data-offset old-len)
1234 (la new-idx ppc32::misc-data-offset new-len)
1235 (beqlr)
1236 @loop
1237 (subi old-idx old-idx 4)
1238 (cmpw old-idx new-idx)
1239 (stwx ppc32::rzero bignum old-idx)
1240 (bne @loop)
1241 (slwi header new-len (- ppc32::num-subtag-bits ppc32::fixnumshift))
1242 (ori header header ppc32::subtag-bignum)
1243 (stw header ppc32::misc-header-offset bignum)
1244 (blr)))
1245
1246;;; Especially when large operands are involved, the GNU Multiple Precision
1247;;; library's algorithm's are often faster than Clozure CL's. GMP's MPN
1248;;; library defines operations on "limb vectors", which are basically
1249;;; just sequences of 32-bit digits (least-significant digit first), which
1250;;; is just about exactly the same way that Clozure CL stores bignums.
1251;;; We might want to (eventually) link some or all of GMP into Clozure CL;
1252;;; in the meantime, it seems that we get some performance benefit from
1253;;; using GMP representation and algorithms in some mixture of LAP and Lisp.
1254;;; To approximate the "limb vector" representation, we copy operands to
1255;;; (and results from) stack-allocated macptrs. Since those macptrs are
1256;;; word-aligned, we can use fixnums to represent word-aligned pointers.
1257;;; Obviously, it costs a little to copy back and forth like this; we
1258;;; only win when operands are fairly large, and when we can replace an
1259;;; N^2 algorithm with something cheaper.
1260
1261;;; Macptr MUST be word-aligned (low 2 bits must be 0). Extract
1262;;; such an address, return it as a fixnum.
1263(defppclapfunction macptr->fixnum ((ptr arg_z))
1264 (macptr-ptr arg_z ptr)
1265 (blr))
1266
1267;;; Copy the limb SRC points to to where DEST points.
1268(defppclapfunction copy-limb ((src arg_y) (dest arg_z))
1269 (lwz imm0 0 src)
1270 (stw imm0 0 dest)
1271 (blr))
1272
1273;;; Return T iff LIMB contains 0.
1274(defppclapfunction limb-zerop ((limb arg_z))
1275 (lwz imm0 0 limb)
1276 (cntlzw imm0 imm0)
1277 (srwi imm0 imm0 5)
1278 (bit0->boolean arg_z imm0 imm0)
1279 (blr))
1280
1281;;; Return -1,0,1 according to whether the contents of Y are
1282;;; <,=,> the contents of Z.
1283(defppclapfunction compare-limbs ((y arg_y) (z arg_z))
1284 (lwz imm1 0 z)
1285 (lwz imm0 0 y)
1286 (cmplw imm0 imm1)
1287 (li arg_z 0)
1288 (beqlr)
1289 (li arg_z '1)
1290 (bgtlr)
1291 (li arg_z '-1)
1292 (blr))
1293
1294;;; Add a fixnum to the limb LIMB points to. Ignore overflow.
1295(defppclapfunction add-fixnum-to-limb ((fixnum arg_y) (limb arg_z))
1296 (unbox-fixnum imm0 fixnum)
1297 (lwz imm1 0 limb)
1298 (add imm1 imm1 imm0)
1299 (stw imm1 0 limb)
1300 (blr))
1301
1302;;; Store a fixnum value where LIMB points.
1303(defppclapfunction copy-fixnum-to-limb ((fixnum arg_y) (limb arg_z))
1304 (unbox-fixnum imm0 fixnum)
1305 (stwu imm0 0 limb)
1306 (blr))
1307
1308;;; Increment a "LIMB VECTOR" (bignum) by a small amount. The caller
1309;;; knows that carries will only propagate for a word or two.
1310(defppclapfunction mpn-incr-u ((limb arg_y) (fixby arg_z))
1311 (let ((by imm0)
1312 (sum imm1))
1313 (unbox-fixnum by fixby)
1314 @loop
1315 (lwz sum 0 limb)
1316 (add sum sum by)
1317 (cmplw sum by)
1318 (stw sum 0 limb)
1319 (li by 1)
1320 (la limb 4 limb)
1321 (blt @loop)
1322 (blr)))
1323
1324;;; Store XP-YP at WP; return carry (0 or 1).
1325;;; wp, xp, yp: word-aligned, unboxed ptrs (fixnums)
1326;;; size: boxed fixnum
1327;;; returns boxed carry
1328(defppclapfunction mpn-sub-n ((wp 0) (xp arg_x) (yp arg_y) (size arg_z))
1329 (vpop imm0)
1330 (subi size size '1)
1331 (cmpwi size 0)
1332 (lwz imm3 0 xp)
1333 (lwz imm4 0 yp)
1334 (sub imm1 xp imm0) ; imm1 = xp-wp
1335 (sub imm2 yp imm0) ; imm2 = yp-wp
1336 (addi imm1 imm1 4) ; imm1 = xp-wp+4
1337 (addi imm2 imm2 4) ; imm2 = yp-wp+4
1338 (subfc imm3 imm4 imm3)
1339 (stw imm3 0 imm0) ; wp[0]
1340 (beq @done)
1341 @top
1342 (subi size size '1)
1343 (cmpwi size 0)
1344 (lwzx imm3 imm1 imm0) ; imm3 = xp[i]
1345 (lwzx imm4 imm2 imm0) ; imm4 = xp[i]
1346 (subfe imm3 imm4 imm3)
1347 (stwu imm3 4 imm0)
1348 (bne @top)
1349 @done
1350 (subfe imm0 rzero rzero)
1351 (subfic imm0 imm0 0)
1352 (box-fixnum arg_z imm0)
1353 (blr))
1354
1355;;; Store XP+YP at WP; return carry (0 or 1).
1356;;; wp, xp, yp = word-aligned, unboxed macptrs (fixnums).
1357;;; size = boxed fixnum
1358;;; result = boxed carry
1359(defppclapfunction mpn-add-n ((wp 0) (xp arg_x) (yp arg_y) (size arg_z))
1360 (vpop imm0)
1361 (subi size size '1)
1362 (cmpwi size 0)
1363 (lwz imm3 0 xp)
1364 (lwz imm4 0 yp)
1365 (sub imm1 xp imm0) ; imm1 = xp-wp
1366 (sub imm2 yp imm0) ; imm2 = yp-wp
1367 (addi imm1 imm1 4) ; imm1 = xp-wp+4
1368 (addi imm2 imm2 4) ; imm2 = yp-wp+4
1369 (addc imm3 imm3 imm4)
1370 (stw imm3 0 imm0) ; wp[0]
1371 (beq @done)
1372 @top
1373 (subi size size '1)
1374 (cmpwi size 0)
1375 (lwzx imm3 imm1 imm0) ; imm3 = xp[i]
1376 (lwzx imm4 imm2 imm0) ; imm4 = xp[i]
1377 (adde imm3 imm4 imm3)
1378 (stwu imm3 4 imm0)
1379 (bne @top)
1380 @done
1381 (addze imm0 rzero)
1382 (box-fixnum arg_z imm0)
1383 (blr))
1384
1385;;; Add the single limb LIMB to S1P (propagating carry.) Store the
1386;;; result at RP. RP and S1P may be the same place, so check for
1387;;; that and do nothing after carry stops propagating. Return carry.
1388(defppclapfunction mpn-add-1 ((rp-offset 0) (s1p arg_x) (size arg_y) (limb arg_z))
1389 (let ((rp temp0))
1390 (vpop rp)
1391 (subi size size '1)
1392 (cmpwi cr2 size 0)
1393 (cmpw cr1 rp s1p) ;a common case
1394 (subi rp rp 4)
1395 (subi s1p s1p 4)
1396 (lwz imm0 0 limb)
1397 (lwzu imm1 4 s1p)
1398 (addc imm1 imm1 imm0)
1399 (addze. imm0 rzero)
1400 (stwu imm1 4 rp)
1401 (beq cr2 @done)
1402 @top
1403 (beq cr0 @finish) ; branch if no more carry
1404 (subi size size '1)
1405 (cmpwi cr2 size 0)
1406 (lwzu imm1 4 s1p)
1407 (addc imm1 imm1 imm0)
1408 (addze. imm0 rzero)
1409 (stwu imm1 4 rp)
1410 (bne cr2 @top)
1411 (box-fixnum arg_z imm0)
1412 (blr)
1413 @finish
1414 (beq cr1 @done)
1415 @loop
1416 (subi size size '1)
1417 (cmpwi cr2 size 0)
1418 (lwzu imm1 4 s1p)
1419 (stwu imm1 4 rp)
1420 (bne cr2 @loop)
1421 @done
1422 (box-fixnum arg_z imm0)
1423 (blr)))
1424;;; Multiply the limb vector S1 by the single limb at LIMBPTR, storing
1425;;; the result at RES. Store the "carry out" (high word of last 64-bit
1426;;; partial product) at the limb RESULT.
1427;;; res, s1, limbptr, result:
1428;;; unboxed, word-aligned ptrs (fixnums). size: boxed fixnum
1429;;; It'd be hard to transliterate the GMP code here; the GMP version
1430;;; uses lots more immediate registers than we can easily use in LAP
1431;;; (and is much more aggressively pipelined).
1432(defppclapfunction mpn-mul-1 ((res-offset 4)
1433 (s1-offset 0)
1434 (size arg_x)
1435 (limbptr arg_y)
1436 (result arg_z))
1437 (let ((limb imm0)
1438 (resptr temp0)
1439 (s1 temp1)
1440 (src imm1)
1441 (prod-low imm2)
1442 (prod-high imm3)
1443 (carry imm4))
1444 (lwz resptr res-offset vsp)
1445 (lwz s1 s1-offset vsp)
1446 (la vsp 8 vsp)
1447 (la resptr -4 resptr) ; pre-decrement
1448 (la s1 -4 s1)
1449 (addic carry carry 0)
1450 (li carry 0)
1451 (lwz limb 0 limbptr)
1452 @loop
1453 (subi size size '1)
1454 (cmpwi size 0)
1455 (lwzu src 4 s1)
1456 (mulhwu prod-high src limb)
1457 (mullw prod-low src limb)
1458 (addc prod-low prod-low carry)
1459 (addze carry prod-high)
1460 (stwu prod-low 4 resptr)
1461 (bne @loop)
1462 (stw carry 0 result)
1463 (blr)))
1464
1465;;; multiply s1*limb and add result to res
1466;;; res, s1, limbptr, result:
1467;;; unboxed, word-aligned ptrs (fixnums).
1468;;; size: boxed fixnum
1469;;; limbptr: source "limb".
1470;;; result: carry out (high word of product).
1471(defppclapfunction mpn-addmul-1 ((res-offset 4)
1472 (s1-offset 0)
1473 (size arg_x)
1474 (limbptr arg_y)
1475 (result arg_z))
1476 (let ((limb imm0)
1477 (resptr temp0)
1478 (s1 temp1)
1479 (src imm1)
1480 (prod-low imm2)
1481 (prod-high imm3)
1482 (carry imm4)
1483 (prev imm4))
1484 (lwz resptr res-offset vsp)
1485 (lwz s1 s1-offset vsp)
1486 (la vsp 8 vsp)
1487 (la resptr -4 resptr) ; pre-decrement
1488 (la s1 -4 s1)
1489 (addic carry carry 0)
1490 (li carry 0)
1491 (lwz limb 0 limbptr)
1492 @loop
1493 (subi size size '1)
1494 (cmpwi size 0)
1495 (lwzu src 4 s1)
1496 (mulhwu prod-high src limb)
1497 (mullw prod-low src limb)
1498 (addc prod-low prod-low carry)
1499 (addze prod-high prod-high)
1500 (lwz prev 4 resptr)
1501 (addc prev prev prod-low)
1502 (stwu prev 4 resptr)
1503 (addze carry prod-high)
1504 (bne @loop)
1505 (stw carry 0 result)
1506 (blr)))
1507
1508;;; Multiply the UN-word limb vector at UP and the VN-word limb vector
1509;;; at VP, store the result at RP.
1510(defppclapfunction mpn-mul-basecase ((rp-offset 4)
1511 (up-offset 0)
1512 (un arg_x)
1513 (vp arg_y)
1514 (vn arg_z))
1515 (let ((resptr temp0)
1516 (s1 temp1)
1517 (up temp2)
1518 (rp temp3)
1519 (size nargs)
1520 (limb imm0)
1521 (src imm1)
1522 (prod-low imm2)
1523 (prod-high imm3)
1524 (prev imm4)
1525 (carry imm4))
1526 (lwz resptr rp-offset vsp)
1527 (la rp -4 resptr)
1528 (lwz up up-offset vsp)
1529 (la s1 -4 up)
1530 (la vsp 8 vsp)
1531 (mr size un)
1532 (lwz limb 0 vp)
1533 (subi vn vn '1)
1534 (cmpwi cr2 vn 0)
1535 (li carry 0)
1536 @mul-1-loop
1537 (subi size size '1)
1538 (cmpwi size 0)
1539 (lwzu src 4 s1)
1540 (mulhwu prod-high src limb)
1541 (mullw prod-low src limb)
1542 (addc prod-low prod-low carry)
1543 (addze carry prod-high)
1544 (stwu prod-low 4 rp)
1545 (bne @mul-1-loop)
1546 (stw carry 4 rp)
1547 @again
1548 (beq cr2 @done)
1549 (subi vn vn '1)
1550 (cmpwi cr2 vn 0)
1551 (mr rp resptr)
1552 (la resptr 4 resptr)
1553 (la s1 -4 up)
1554 (lwzu limb 4 vp)
1555 (mr size un)
1556 (addic carry carry 0)
1557 (li carry 0)
1558 @addmul-1-loop
1559 (subi size size '1)
1560 (cmpwi size 0)
1561 (lwzu src 4 s1)
1562 (mulhwu prod-high src limb)
1563 (mullw prod-low src limb)
1564 (addc prod-low prod-low carry)
1565 (addze prod-high prod-high)
1566 (lwz prev 4 rp)
1567 (addc prev prev prod-low)
1568 (stwu prev 4 rp)
1569 (addze carry prod-high)
1570 (bne @addmul-1-loop)
1571 (stw carry 4 rp)
1572 (b @again)
1573 @done
1574 (li arg_z (target-nil-value))
1575 (blr)))
1576
1577;;; left-shift src by 1 bit, storing result at res. Return
1578;;; the bit that was shifted out.
1579(defppclapfunction mpn-lshift-1 ((resptr arg_x) (s1ptr arg_y) (size-arg arg_z))
1580 (let ((size temp0)
1581 (last-bit imm0)
1582 (prev imm1)
1583 (curr imm2)
1584 (sleft imm3)
1585 (sright imm4))
1586 (subi size size-arg '1)
1587 (cmpwi size 0)
1588 (add resptr resptr size-arg)
1589 (add s1ptr s1ptr size-arg)
1590 (lwzu prev -4 s1ptr)
1591 (srwi last-bit prev 31)
1592 (box-fixnum arg_z last-bit)
1593 (beq @end1)
1594 @loop
1595 (subi size size '1)
1596 (cmpwi size 0)
1597 (lwzu curr -4 s1ptr)
1598 (slwi sleft prev 1)
1599 (srwi sright curr 31)
1600 (or sright sright sleft)
1601 (stwu sright -4 resptr)
1602 (beq @end2)
1603 (subi size size '1)
1604 (cmpwi size 0)
1605 (lwzu prev -4 s1ptr)
1606 (slwi sleft curr 1)
1607 (srwi sright prev 31)
1608 (or sright sright sleft)
1609 (stwu sright -4 resptr)
1610 (bne @loop)
1611 @end1
1612 (slwi sleft prev 1)
1613 (stwu sleft -4 resptr)
1614 (blr)
1615 @end2
1616 (slwi sleft curr 1)
1617 (stwu sleft -4 resptr)
1618 (blr)))
1619
1620;;; Do a 32x32=64 unsigned multiply of the words at X and Y. Store
1621;;; result (low word first) at RESULT.
1622(defppclapfunction umulppm ((x arg_x) (y arg_y) (result arg_z))
1623 (lwz imm0 0 x)
1624 (lwz imm1 0 y)
1625 (mullw imm2 imm0 imm1)
1626 (mulhwu imm3 imm0 imm1)
1627 (stw imm2 0 result)
1628 (stw imm3 4 result)
1629 (blr))
1630
1631
1632;;; for truncate-by-fixnum etal
1633;;; doesnt store quotient - just returns rem in 2 halves
1634(defppclapfunction %floor-loop-no-quo ((q arg_x)(yhi arg_y)(ylo arg_z))
1635 (let ((a imm1)
1636 (b imm2)
1637 (y imm3)
1638 (quo imm0)
1639 (qidx temp0)
1640 (qlen temp1))
1641 (lwz imm4 (- ppc32::fulltag-misc) q)
1642 (header-length qlen imm4)
1643 (subi qidx qlen 4)
1644 (mr b rzero)
1645 (compose-digit y yhi ylo)
1646 @loop
1647 (rlwinm a b -16 16 31)
1648 (rlwinm b b 16 0 15)
1649 (la imm4 ppc32::misc-data-offset q)
1650 (lwzx imm4 qidx imm4) ; q contents
1651 (rlwimi b imm4 16 16 31) ; hi 16 to lo b
1652 ;(dbg)
1653 (48x32-divide a b y fp0 fp1 fp2 imm4)
1654 (fctiwz fp0 fp0)
1655 (stwu tsp -32 tsp)
1656 (stw tsp 4 tsp)
1657 (stfd fp0 24 tsp)
1658 (lwz quo (+ 24 4) tsp) ; 16 quo bits above stuff used by 48x32
1659 ; now mul quo by y
1660 (mullw imm4 y quo)
1661 ; and subtract from a,b
1662 (subfc b imm4 b)
1663 ; new a and b are low 2 digits of this (b) and last digit in array
1664 ; and do it again on low 3 digits
1665 ;(dbg)
1666 (rlwinm a b -16 16 31)
1667 (rlwinm b b 16 0 15)
1668 (la imm4 ppc32::misc-data-offset q)
1669 (lwzx imm4 qidx imm4)
1670 (rlwimi b imm4 0 16 31)
1671 (48x32-divide a b y fp0 fp1 fp2 imm4)
1672 (fctiwz fp0 fp0)
1673 (stfd fp0 16 tsp) ; quo lo
1674 (subi qidx qidx 4)
1675 (cmpwi :cr1 qidx 0)
1676 (lwz quo (+ 16 4) tsp)
1677 (lwz tsp 0 tsp)
1678 (mullw imm4 y quo)
1679 (subfc b imm4 b) ; b is remainder
1680 (bge :cr1 @loop)
1681 (digit-h temp0 b)
1682 (vpush temp0)
1683 (digit-l temp0 b)
1684 (vpush temp0)
1685 (la temp0 8 vsp)
1686 (set-nargs 2)
1687 (ba .SPvalues)))
1688
1689
1690; store result in dest, return rem in 2 halves
1691(defppclapfunction %floor-loop-quo ((q-stk 0)(dest arg_x)(yhi arg_y)(ylo arg_z))
1692 (let ((a imm1)
1693 (b imm2)
1694 (y imm3)
1695 (quo imm0)
1696 (qidx temp0)
1697 (qlen temp1)
1698 (q temp2))
1699 (vpop q)
1700 (lwz imm4 (- ppc32::fulltag-misc) q)
1701 (header-length qlen imm4)
1702 (subi qidx qlen 4)
1703 (mr b rzero)
1704 (compose-digit y yhi ylo)
1705 @loop
1706 (rlwinm a b -16 16 31)
1707 (rlwinm b b 16 0 15)
1708 (la imm4 ppc32::misc-data-offset q)
1709 (lwzx imm4 qidx imm4) ; q contents
1710 (rlwimi b imm4 16 16 31) ; hi 16 to lo b
1711 (48x32-divide a b y fp0 fp1 fp2 imm4)
1712 (fctiwz fp0 fp0)
1713 (stwu tsp -32 tsp)
1714 (stw tsp 4 tsp)
1715 (stfd fp0 24 tsp)
1716 (lwz quo (+ 24 4) tsp) ; 16 quo bits above stuff used by 48x32
1717 ; now mul quo by y
1718 (mullw imm4 y quo)
1719 ; and subtract from a,b
1720 (subfc b imm4 b)
1721 ; new a and b are low 2 digits of this (b) and last digit in array
1722 ; and do it again on low 3 digits
1723 ;(dbg)
1724 (rlwinm a b -16 16 31)
1725 (rlwinm b b 16 0 15)
1726 (la imm4 ppc32::misc-data-offset q)
1727 (lwzx imm4 qidx imm4)
1728 (rlwimi b imm4 0 16 31)
1729 (48x32-divide a b y fp0 fp1 fp2 imm4)
1730 (fctiwz fp0 fp0)
1731 (stfd fp0 16 tsp) ; quo lo
1732 (lwz quo (+ 16 4) tsp)
1733 (mullw imm4 y quo)
1734 (subfc b imm4 b) ; b is remainder
1735 (lwz quo (+ 24 4) tsp) ; quo-hi
1736 (rlwinm quo quo 16 0 15)
1737 (lwz imm4 (+ 16 4) tsp) ; quo lo
1738 (lwz tsp 0 tsp)
1739 (rlwimi quo imm4 0 16 31)
1740 (la imm4 ppc32::misc-data-offset dest)
1741 (stwx quo qidx imm4)
1742 (subic. qidx qidx 4)
1743 (bge @loop)
1744 (digit-h temp0 b)
1745 (vpush temp0)
1746 (digit-l temp0 b)
1747 (vpush temp0)
1748 (la temp0 8 vsp)
1749 (set-nargs 2)
1750 (ba .SPvalues)))
1751
1752;;; get xidx thing from x, yidx thing from y if same return #xffff
1753;;; #xffff otherwise get another thing from x and 1- xidx and do as
1754;;; %floor of xthing otherx ything
1755;;; Huh?
1756(defppclapfunction %floor-99 ((x-stk 0)(xidx arg_x)(yptr arg_y)(yidx arg_z))
1757 (let ((xptr temp0)
1758 (a imm1)
1759 (b imm2)
1760 (y imm3)
1761 (quo imm0))
1762 (vpop xptr)
1763 (la imm4 ppc32::misc-data-offset XIDX)
1764 (lwzx a xptr imm4)
1765 (la imm4 ppc32::misc-data-offset YIDX)
1766 (lwzx y yptr imm4)
1767 (cmpw a y)
1768 (bne @more)
1769 (li imm4 #xffff)
1770 (rlwinm imm4 imm4 ppc32::fixnumshift (- 16 ppc32::fixnumshift) (- 31 ppc32::fixnum-shift))
1771 (vpush imm4)
1772 (vpush imm4)
1773 (la temp0 8 vsp)
1774 (set-nargs 2)
1775 (ba .spvalues)
1776 @MORE
1777 ; a has 16 bits from ahi, bhi gets alo blo gets bhi
1778 (la imm4 (- ppc32::misc-data-offset 4) xidx)
1779 (lwzx b xptr imm4)
1780 (rlwinm b b 16 16 31) ; bhi to blo
1781 (rlwimi b a 16 0 15) ; alo to bhi
1782 (rlwinm a a 16 16 31) ; a gets alo
1783 (48x32-divide a b y fp0 fp1 fp2 imm4)
1784 (fctiwz fp0 fp0)
1785 (stwu tsp -32 tsp)
1786 (stw tsp 4 tsp)
1787 (stfd fp0 24 tsp)
1788 (lwz quo (+ 24 4) tsp) ; 16 quo bits above stuff used by 48x32
1789 ; now mul quo by y
1790 (mullw imm4 y quo)
1791 ; and subtract from a,b
1792 (subfc b imm4 b)
1793 ; AND AGAIN
1794 (rlwinm a b -16 16 31) ; a gets b hi
1795 (rlwinm b b 16 0 15) ; b lo to b hi
1796 (la imm4 (- ppc32::misc-data-offset 4) xidx)
1797 (lwzx imm4 imm4 xptr)
1798 (rlwimi b imm4 0 16 31)
1799 (48x32-divide a b y fp0 fp1 fp2 imm4)
1800 (fctiwz fp0 fp0)
1801 (stfd fp0 16 tsp) ; quo lo
1802 (lwz quo (+ 24 4) tsp) ; quo-hi
1803 (box-fixnum temp0 quo)
1804 (vpush temp0)
1805 (lwz quo (+ 16 4) tsp) ; quo lo
1806 (lwz tsp 0 tsp)
1807 (box-fixnum temp0 quo)
1808 (vpush temp0)
1809 (la temp0 8 vsp)
1810 (set-nargs 2)
1811 (ba .SPvalues)))
1812
1813; End of ppc32-bignum.lisp
Note: See TracBrowser for help on using the repository browser.