| 1 | ;;-*- Mode: Lisp; Package: CCL -*-
|
|---|
| 2 | ;;;
|
|---|
| 3 | ;;; Copyright 1994-2009 Clozure Associates
|
|---|
| 4 | ;;;
|
|---|
| 5 | ;;; Licensed under the Apache License, Version 2.0 (the "License");
|
|---|
| 6 | ;;; you may not use this file except in compliance with the License.
|
|---|
| 7 | ;;; You may obtain a copy of the License at
|
|---|
| 8 | ;;;
|
|---|
| 9 | ;;; http://www.apache.org/licenses/LICENSE-2.0
|
|---|
| 10 | ;;;
|
|---|
| 11 | ;;; Unless required by applicable law or agreed to in writing, software
|
|---|
| 12 | ;;; distributed under the License is distributed on an "AS IS" BASIS,
|
|---|
| 13 | ;;; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|---|
| 14 | ;;; See the License for the specific language governing permissions and
|
|---|
| 15 | ;;; limitations under the License.
|
|---|
| 16 |
|
|---|
| 17 |
|
|---|
| 18 | (in-package "CCL")
|
|---|
| 19 |
|
|---|
| 20 | (eval-when (:compile-toplevel :execute)
|
|---|
| 21 | (require "PPC32-ARCH")
|
|---|
| 22 | (require "PPC-LAPMACROS")
|
|---|
| 23 |
|
|---|
| 24 | ;; Set RES to 1 if (u< x y), to 0 otherwise.
|
|---|
| 25 | (defppclapmacro sltu (res x y)
|
|---|
| 26 | `(progn
|
|---|
| 27 | (subfc ,res ,x ,y)
|
|---|
| 28 | (subfe ,res ,res ,res)
|
|---|
| 29 | (neg ,res ,res)))
|
|---|
| 30 |
|
|---|
| 31 | (defppclapmacro 48x32-divide (x-hi16 x-lo y freg temp-freg freg2 immx)
|
|---|
| 32 | `(let ((temp 16)
|
|---|
| 33 | (temp.h 16)
|
|---|
| 34 | (temp.l 20)
|
|---|
| 35 | (zero 8)
|
|---|
| 36 | (zero.h 8)
|
|---|
| 37 | (zero.l 12))
|
|---|
| 38 | (stwu tsp -24 tsp)
|
|---|
| 39 | (stw tsp 4 tsp)
|
|---|
| 40 | (lwi ,immx #x43300000) ; 1075 = 1022+53
|
|---|
| 41 | (stw ,immx zero.h tsp)
|
|---|
| 42 | (stw rzero zero.l tsp)
|
|---|
| 43 | (lfd ,temp-freg zero tsp)
|
|---|
| 44 | (rlwimi ,immx ,x-hi16 0 16 31)
|
|---|
| 45 | (stw ,immx temp.h tsp)
|
|---|
| 46 | (stw ,x-lo temp.l tsp)
|
|---|
| 47 | (lfd ,freg temp tsp)
|
|---|
| 48 |
|
|---|
| 49 | (fsub ,freg ,freg ,temp-freg)
|
|---|
| 50 | (lwi ,immx #x43300000)
|
|---|
| 51 | (stw ,immx temp.h tsp)
|
|---|
| 52 | (stw ,y temp.l tsp)
|
|---|
| 53 | (lfd ,freg2 temp tsp)
|
|---|
| 54 | (lwz tsp 0 tsp)
|
|---|
| 55 | (fsub ,freg2 ,freg2 ,temp-freg)
|
|---|
| 56 | (fdiv ,freg ,freg ,freg2)
|
|---|
| 57 | ))
|
|---|
| 58 |
|
|---|
| 59 | )
|
|---|
| 60 |
|
|---|
| 61 | ;;; %BIGNUM-REF needs to access bignums as obviously as possible, and it needs
|
|---|
| 62 | ;;; to be able to return 32 bits somewhere no one looks for real objects.
|
|---|
| 63 | ;;;
|
|---|
| 64 | ;;; The easiest thing to do is to store the 32 raw bits in two fixnums
|
|---|
| 65 | ;;; and return multiple values.
|
|---|
| 66 | (defppclapfunction %bignum-ref ((bignum arg_y) (i arg_z))
|
|---|
| 67 | (vref32 imm0 bignum i imm1)
|
|---|
| 68 | (digit-h temp0 imm0)
|
|---|
| 69 | (digit-l temp1 imm0)
|
|---|
| 70 | (vpush temp0)
|
|---|
| 71 | (vpush temp1)
|
|---|
| 72 | (la temp0 8 vsp) ; ?? why not (mr temp0 vsp) before vpushing?
|
|---|
| 73 | (set-nargs 2) ; that doesn't make any difference. And, in this case,
|
|---|
| 74 | ; we can get away without setting nargs (since the caller
|
|---|
| 75 | ; called us with 2 args, but that's horrible style.)
|
|---|
| 76 | (ba .SPvalues))
|
|---|
| 77 |
|
|---|
| 78 |
|
|---|
| 79 | ;;; Set the 0th element of DEST (a bignum or some other 32-bit ivector)
|
|---|
| 80 | ;;; to the Ith element of the bignum SRC.
|
|---|
| 81 | (defppclapfunction %ref-digit ((bignum arg_x) (i arg_y) (dest arg_z))
|
|---|
| 82 | (la imm1 ppc32::misc-data-offset i)
|
|---|
| 83 | (lwzx imm0 bignum imm1)
|
|---|
| 84 | (stw imm0 ppc32::misc-data-offset dest)
|
|---|
| 85 | (blr))
|
|---|
| 86 |
|
|---|
| 87 | ;;; BIGNUM[I] := DIGIT[0]
|
|---|
| 88 | (defppclapfunction %set-digit ((bignum arg_x) (i arg_y) (digit arg_z))
|
|---|
| 89 | (la imm1 ppc32::misc-data-offset i)
|
|---|
| 90 | (lwz imm0 ppc32::misc-data-offset digit)
|
|---|
| 91 | (stwx imm0 bignum imm1)
|
|---|
| 92 | (blr))
|
|---|
| 93 |
|
|---|
| 94 | ;;; Return 0 if the 0th digit in X is 0.
|
|---|
| 95 | (defppclapfunction %digit-zerop ((x arg_z))
|
|---|
| 96 | (lwz imm0 ppc32::misc-data-offset x)
|
|---|
| 97 | (cntlzw imm0 imm0)
|
|---|
| 98 | (srwi imm0 imm0 5)
|
|---|
| 99 | (rlwimi imm0 imm0 4 27 27)
|
|---|
| 100 | (addi arg_z imm0 (target-nil-value))
|
|---|
| 101 | (blr))
|
|---|
| 102 |
|
|---|
| 103 | ;;; store the sign of bignum (0 or -1) in the one-word bignum "digit".
|
|---|
| 104 | (defppclapfunction %bignum-sign-digit ((bignum arg_y) (digit arg_z))
|
|---|
| 105 | (vector-length imm0 bignum imm0)
|
|---|
| 106 | (la imm0 (- ppc32::misc-data-offset 4) imm0) ; Reference last (most significant) digit
|
|---|
| 107 | (lwzx imm0 bignum imm0)
|
|---|
| 108 | (srawi imm0 imm0 31) ;propagate sign bit
|
|---|
| 109 | (stw imm0 ppc32::misc-data-offset digit)
|
|---|
| 110 | (blr))
|
|---|
| 111 |
|
|---|
| 112 | ;;; Return the sign of bignum (0 or -1) as a fixnum
|
|---|
| 113 | (defppclapfunction %bignum-sign ((bignum arg_z))
|
|---|
| 114 | (vector-length imm0 bignum imm0)
|
|---|
| 115 | (la imm0 (- ppc32::misc-data-offset 4) imm0) ; Reference last (most significant) digit
|
|---|
| 116 | (lwzx imm0 bignum imm0)
|
|---|
| 117 | (srawi imm0 imm0 31) ;propagate sign bit
|
|---|
| 118 | (box-fixnum arg_z imm0)
|
|---|
| 119 | (blr))
|
|---|
| 120 |
|
|---|
| 121 | ;;; Count the sign bits in the most significant digit of bignum;
|
|---|
| 122 | ;;; return fixnum count.
|
|---|
| 123 | (defppclapfunction %bignum-sign-bits ((bignum arg_z))
|
|---|
| 124 | (vector-length imm0 bignum imm0)
|
|---|
| 125 | (la imm0 (- ppc32::misc-data-offset 4) imm0) ; Reference last (most significant) digit
|
|---|
| 126 | (lwzx imm0 bignum imm0)
|
|---|
| 127 | (cmpwi imm0 0)
|
|---|
| 128 | (not imm0 imm0)
|
|---|
| 129 | (blt @wasneg)
|
|---|
| 130 | (not imm0 imm0)
|
|---|
| 131 | @wasneg
|
|---|
| 132 | (cntlzw imm0 imm0)
|
|---|
| 133 | (box-fixnum arg_z imm0)
|
|---|
| 134 | (blr))
|
|---|
| 135 |
|
|---|
| 136 | (defppclapfunction %digit-0-or-plusp ((bignum arg_y) (idx arg_z))
|
|---|
| 137 | (la imm0 ppc32::misc-data-offset idx)
|
|---|
| 138 | (lwzx imm0 bignum imm0)
|
|---|
| 139 | (xoris imm0 imm0 #x8000) ; invert sign bit
|
|---|
| 140 | (srwi imm0 imm0 31)
|
|---|
| 141 | (bit0->boolean arg_z imm0 imm0) ; return T if sign bit was clear before inversion
|
|---|
| 142 | (blr))
|
|---|
| 143 |
|
|---|
| 144 | ;;; For oddp, evenp
|
|---|
| 145 | (defppclapfunction %bignum-oddp ((bignum arg_z))
|
|---|
| 146 | (lwz imm0 ppc32::misc-data-offset bignum)
|
|---|
| 147 | (clrlwi imm0 imm0 31)
|
|---|
| 148 | (bit0->boolean arg_z imm0 imm0)
|
|---|
| 149 | (blr))
|
|---|
| 150 |
|
|---|
| 151 | (defppclapfunction bignum-plusp ((bignum arg_z))
|
|---|
| 152 | (vector-length imm0 bignum imm0)
|
|---|
| 153 | (la imm0 (- ppc32::misc-data-offset 4) imm0) ; Reference last (most significant) digit
|
|---|
| 154 | (lwzx imm0 bignum imm0)
|
|---|
| 155 | (xoris imm0 imm0 #x8000) ; invert sign bit
|
|---|
| 156 | (srwi imm0 imm0 31)
|
|---|
| 157 | (bit0->boolean arg_z imm0 imm0) ; return T if sign bit was clear before inversion
|
|---|
| 158 | (blr))
|
|---|
| 159 |
|
|---|
| 160 | (defppclapfunction %fixnum-to-bignum-set ((bignum arg_y) (fixnum arg_z))
|
|---|
| 161 | (unbox-fixnum imm0 fixnum)
|
|---|
| 162 | (stw imm0 ppc32::misc-data-offset bignum)
|
|---|
| 163 | (blr))
|
|---|
| 164 |
|
|---|
| 165 | (defppclapfunction bignum-minusp ((bignum arg_z))
|
|---|
| 166 | (vector-length imm0 bignum imm0)
|
|---|
| 167 | (la imm0 (- ppc32::misc-data-offset 4) imm0) ; Reference last (most significant) digit
|
|---|
| 168 | (lwzx imm0 bignum imm0)
|
|---|
| 169 | (srwi imm0 imm0 31)
|
|---|
| 170 | (rlwimi imm0 imm0 4 27 27)
|
|---|
| 171 | (addi arg_z imm0 (target-nil-value)) ; return T if sign bit was clear before inversion
|
|---|
| 172 | (blr))
|
|---|
| 173 |
|
|---|
| 174 |
|
|---|
| 175 | ;;; Add the digits A[I] and B[J], and the incoming carry C (a fixnum).
|
|---|
| 176 | ;;; Store the result in R[K], and return the outgoing carry.
|
|---|
| 177 | ;;; If I is NIL, A is a fixnum. If J is NIL, B is a fixnum.
|
|---|
| 178 |
|
|---|
| 179 | (defppclapfunction %add-with-carry ((r 12) (k 8) (c 4) (a 0) (i arg_x) (b arg_y) (j arg_z))
|
|---|
| 180 | (cmpwi cr1 j (target-nil-value))
|
|---|
| 181 | (cmpwi cr0 i (target-nil-value))
|
|---|
| 182 | (lwz temp0 a vsp)
|
|---|
| 183 | (unbox-fixnum imm1 temp0)
|
|---|
| 184 | (unbox-fixnum imm2 b)
|
|---|
| 185 | (beq cr0 @got-a)
|
|---|
| 186 | (la imm1 ppc32::misc-data-offset i)
|
|---|
| 187 | (lwzx imm1 temp0 imm1)
|
|---|
| 188 | @got-a
|
|---|
| 189 | (beq cr1 @got-b)
|
|---|
| 190 | (la imm2 ppc32::misc-data-offset j)
|
|---|
| 191 | (lwzx imm2 b imm2)
|
|---|
| 192 | @got-b
|
|---|
| 193 | (lwz temp0 c vsp)
|
|---|
| 194 | (unbox-fixnum imm0 temp0)
|
|---|
| 195 | (addic imm0 imm0 -1)
|
|---|
| 196 | (lwz temp1 r vsp)
|
|---|
| 197 | (lwz temp0 k vsp)
|
|---|
| 198 | (la vsp 16 vsp)
|
|---|
| 199 | (adde imm0 imm1 imm2)
|
|---|
| 200 | (la imm2 ppc32::misc-data-offset temp0)
|
|---|
| 201 | (stwx imm0 temp1 imm2)
|
|---|
| 202 | (addze imm0 rzero)
|
|---|
| 203 | (box-fixnum arg_z imm0)
|
|---|
| 204 | (blr))
|
|---|
| 205 |
|
|---|
| 206 |
|
|---|
| 207 |
|
|---|
| 208 |
|
|---|
| 209 |
|
|---|
| 210 | ;;; Store the result of A[I] - B[J] - borrow into R[K], returning the borrow.
|
|---|
| 211 | ;;; If I is NIL, A is a fixnum; likewise for J and B.
|
|---|
| 212 | (defppclapfunction %subtract-with-borrow ((r 12) (k 8) (borrow 4) (a 0) (i arg_x) (b arg_y) (j arg_z))
|
|---|
| 213 | (cmpwi cr0 i (target-nil-value))
|
|---|
| 214 | (cmpwi cr1 j (target-nil-value))
|
|---|
| 215 | (lwz temp0 a vsp)
|
|---|
| 216 | (unbox-fixnum imm2 b)
|
|---|
| 217 | (unbox-fixnum imm1 temp0)
|
|---|
| 218 | (beq cr1 @got-b)
|
|---|
| 219 | (la imm2 ppc32::misc-data-offset j)
|
|---|
| 220 | (lwzx imm2 b imm2)
|
|---|
| 221 | @got-b
|
|---|
| 222 | (beq cr0 @got-a)
|
|---|
| 223 | (la imm1 ppc32::misc-data-offset i)
|
|---|
| 224 | (lwzx imm1 temp0 imm1)
|
|---|
| 225 | @got-a
|
|---|
| 226 | (lwz temp0 borrow vsp)
|
|---|
| 227 | (unbox-fixnum imm0 temp0)
|
|---|
| 228 | (addic imm0 imm0 -1)
|
|---|
| 229 | (lwz temp0 r vsp)
|
|---|
| 230 | (lwz temp1 k vsp)
|
|---|
| 231 | (la vsp 16 vsp)
|
|---|
| 232 | (subfe imm0 imm2 imm1)
|
|---|
| 233 | (la imm1 ppc32::misc-data-offset temp1)
|
|---|
| 234 | (stwx imm0 temp0 imm1)
|
|---|
| 235 | (addze imm0 rzero)
|
|---|
| 236 | (box-fixnum arg_z imm0)
|
|---|
| 237 | (blr))
|
|---|
| 238 |
|
|---|
| 239 | ;; multiply i'th digit of x by y and add to result starting at digit i
|
|---|
| 240 | (defppclapfunction %multiply-and-add-harder-loop-2
|
|---|
| 241 | ((x-ptr 4) (y-ptr 0) (resptr arg_x)(residx arg_y) (count arg_z))
|
|---|
| 242 | (let ((tem imm0)
|
|---|
| 243 | (y imm1)
|
|---|
| 244 | (prod-h imm2)
|
|---|
| 245 | (prod-l imm3)
|
|---|
| 246 | (x imm4)
|
|---|
| 247 | (xptr temp2)
|
|---|
| 248 | (yidx temp1)
|
|---|
| 249 | (yptr temp0))
|
|---|
| 250 | (lwz xptr x-ptr vsp)
|
|---|
| 251 | (la tem ppc32::misc-data-offset residx)
|
|---|
| 252 | (lwzx x xptr tem)
|
|---|
| 253 | (lwz yptr y-ptr vsp)
|
|---|
| 254 | (li yidx 0) ; init yidx 0
|
|---|
| 255 | (addc prod-h rzero rzero) ; init carry 0, mumble 0
|
|---|
| 256 | @loop
|
|---|
| 257 | (subi count count '1)
|
|---|
| 258 | (cmpwi count 0)
|
|---|
| 259 | (la tem ppc32::misc-data-offset yidx) ; get yidx
|
|---|
| 260 | (lwzx y yptr tem)
|
|---|
| 261 | (mullw prod-l x y)
|
|---|
| 262 | (addc prod-l prod-l prod-h)
|
|---|
| 263 | (mulhwu prod-h x y)
|
|---|
| 264 | (addze prod-h prod-h)
|
|---|
| 265 | (la tem ppc32::misc-data-offset residx)
|
|---|
| 266 | (lwzx y resptr tem)
|
|---|
| 267 | (addc prod-l prod-l y)
|
|---|
| 268 | (addze prod-h prod-h)
|
|---|
| 269 | (stwx prod-l resptr tem)
|
|---|
| 270 | (addi residx residx '1)
|
|---|
| 271 | (addi yidx yidx '1)
|
|---|
| 272 | (bgt @loop)
|
|---|
| 273 | (la tem ppc32::misc-data-offset residx)
|
|---|
| 274 | (stwx prod-h resptr tem)
|
|---|
| 275 | (la vsp 8 vsp)
|
|---|
| 276 | (blr)))
|
|---|
| 277 |
|
|---|
| 278 |
|
|---|
| 279 |
|
|---|
| 280 | ;;; Multiply X[I] by the unboxed value of the (non-negative) fixnum Y;
|
|---|
| 281 | ;;; add the incoming carry from CARRY[0] to the 64-bit product. Store
|
|---|
| 282 | ;;; the low word of the 64-bit sum in R[0] and the high word in
|
|---|
| 283 | ;;; CARRY[0].
|
|---|
| 284 |
|
|---|
| 285 | (defppclapfunction %multiply-and-add ((r 4) (carry 0) (x arg_y) (i arg_x) (y arg_z))
|
|---|
| 286 | (unbox-fixnum imm0 arg_z)
|
|---|
| 287 | (la imm1 ppc32::misc-data-offset i)
|
|---|
| 288 | (lwzx imm1 x imm1)
|
|---|
| 289 | (mulhwu imm2 imm0 imm1)
|
|---|
| 290 | (mullw imm1 imm0 imm1)
|
|---|
| 291 | (lwz temp0 carry vsp)
|
|---|
| 292 | (lwz imm0 ppc32::misc-data-offset temp0)
|
|---|
| 293 | (addc imm1 imm1 imm0)
|
|---|
| 294 | (addze imm2 imm2)
|
|---|
| 295 | (stw imm2 ppc32::misc-data-offset temp0)
|
|---|
| 296 | (lwz arg_z r vsp)
|
|---|
| 297 | (la vsp 8 vsp)
|
|---|
| 298 | (stw imm1 ppc32::misc-data-offset arg_z)
|
|---|
| 299 | (blr))
|
|---|
| 300 |
|
|---|
| 301 | (defppclapfunction %multiply-and-add-fixnum-loop ((len64 0) (x arg_x) (y arg_y) (result arg_z))
|
|---|
| 302 | (let ((carry imm4)
|
|---|
| 303 | (iidx imm3)
|
|---|
| 304 | (unboxed-y imm0)
|
|---|
| 305 | (i temp0)
|
|---|
| 306 | (hi imm2)
|
|---|
| 307 | (rlen temp1))
|
|---|
| 308 | (vpop rlen)
|
|---|
| 309 | (li carry 0)
|
|---|
| 310 | (li iidx ppc32::misc-data-offset)
|
|---|
| 311 | (li i 0)
|
|---|
| 312 | (b @test)
|
|---|
| 313 | @loop
|
|---|
| 314 | (unbox-fixnum unboxed-y y)
|
|---|
| 315 | (lwzx imm1 x iidx)
|
|---|
| 316 | (mulhwu hi imm1 unboxed-y)
|
|---|
| 317 | (mullw imm0 imm1 unboxed-y)
|
|---|
| 318 | (addc imm0 imm0 carry)
|
|---|
| 319 | (addze carry hi)
|
|---|
| 320 | (stwx imm0 result iidx)
|
|---|
| 321 | (la iidx 4 iidx)
|
|---|
| 322 | (la i '1 i)
|
|---|
| 323 | @test
|
|---|
| 324 | (cmpw i rlen)
|
|---|
| 325 | (blt @loop)
|
|---|
| 326 | (stwx carry result iidx)
|
|---|
| 327 | (blr)))
|
|---|
| 328 |
|
|---|
| 329 | (defppclapfunction %floor ((q 4) (r 0) (num-high arg_x) (num-low arg_y) (denom-arg arg_z))
|
|---|
| 330 | (let ((rem imm0)
|
|---|
| 331 | (rem-low imm1)
|
|---|
| 332 | (quo imm2)
|
|---|
| 333 | (temp imm3)
|
|---|
| 334 | (denom imm4))
|
|---|
| 335 | (lwz denom ppc32::misc-data-offset denom)
|
|---|
| 336 | (lwz rem ppc32::misc-data-offset num-high)
|
|---|
| 337 | (lwz rem-low ppc32::misc-data-offset num-low)
|
|---|
| 338 | (mr temp denom)
|
|---|
| 339 | (sltu quo rem denom)
|
|---|
| 340 | (subi temp temp quo)
|
|---|
| 341 | (and temp temp denom)
|
|---|
| 342 | (sub rem temp rem)
|
|---|
| 343 | (li temp0 '32)
|
|---|
| 344 | @loop
|
|---|
| 345 | (subi temp0 temp0 '1)
|
|---|
| 346 | (cmpwi temp0 0)
|
|---|
| 347 | (slwi rem rem 1)
|
|---|
| 348 | (srwi temp rem-low 31)
|
|---|
| 349 | (or rem rem temp)
|
|---|
| 350 | (slwi rem-low rem-low 1)
|
|---|
| 351 | (sltu rem rem denom)
|
|---|
| 352 | (slwi quo quo 1)
|
|---|
| 353 | (or quo quo temp)
|
|---|
| 354 | (subi temp temp 1)
|
|---|
| 355 | (and temp temp denom)
|
|---|
| 356 | (sub rem rem temp)
|
|---|
| 357 | (bne @loop)
|
|---|
| 358 | (not quo quo)
|
|---|
| 359 | (lwz temp0 q vsp)
|
|---|
| 360 | (stw quo ppc32::misc-data-offset temp0)
|
|---|
| 361 | (lwz arg_z r vsp)
|
|---|
| 362 | (la vsp 8 vsp)
|
|---|
| 363 | (stw rem ppc32::misc-data-offset arg_z)
|
|---|
| 364 | (blr)))
|
|---|
| 365 |
|
|---|
| 366 | (defppclapfunction %bignum-ref-hi ((bignum arg_y) (i arg_z))
|
|---|
| 367 | (la imm1 ppc32::misc-data-offset i)
|
|---|
| 368 | (lhzx imm0 bignum imm1)
|
|---|
| 369 | (box-fixnum arg_z imm0)
|
|---|
| 370 | (blr))
|
|---|
| 371 |
|
|---|
| 372 |
|
|---|
| 373 | (defppclapfunction %bignum-set ((bignum 0) (i arg_x) (high arg_y) (low arg_z))
|
|---|
| 374 | (compose-digit imm0 high low)
|
|---|
| 375 | (lwz arg_z bignum vsp)
|
|---|
| 376 | (vset32 imm0 arg_z i imm1)
|
|---|
| 377 | (la vsp 4 vsp)
|
|---|
| 378 | (blr))
|
|---|
| 379 |
|
|---|
| 380 |
|
|---|
| 381 |
|
|---|
| 382 |
|
|---|
| 383 | ; this is silly
|
|---|
| 384 | (defppclapfunction %add-the-carry ((b-h arg_x) (b-l arg_y) (carry-in arg_z))
|
|---|
| 385 | (let ((a imm0)
|
|---|
| 386 | (b imm1)
|
|---|
| 387 | (temp imm2)
|
|---|
| 388 | (c imm3))
|
|---|
| 389 | (compose-digit b b-h b-l)
|
|---|
| 390 | (unbox-fixnum c carry-in)
|
|---|
| 391 | (add b c b)
|
|---|
| 392 | (digit-h temp0 b)
|
|---|
| 393 | (digit-l temp1 b)
|
|---|
| 394 | (vpush temp0)
|
|---|
| 395 | (vpush temp1)
|
|---|
| 396 | (la temp0 8 vsp)
|
|---|
| 397 | (set-nargs 2)
|
|---|
| 398 | (ba .SPvalues)))
|
|---|
| 399 |
|
|---|
| 400 |
|
|---|
| 401 |
|
|---|
| 402 |
|
|---|
| 403 | ;;; %SUBTRACT-WITH-BORROW -- Internal.
|
|---|
| 404 | ;;;
|
|---|
| 405 | ;;; This should be in assembler, and should not cons intermediate results. It
|
|---|
| 406 | ;;; returns a 32bit digit and a borrow resulting from subtracting b from a, and
|
|---|
| 407 | ;;; subtracting a possible incoming borrow.
|
|---|
| 408 | ;;;
|
|---|
| 409 | ;;; We really do: a - b - 1 + borrow, where borrow is either 0 or 1.
|
|---|
| 410 | ;;;
|
|---|
| 411 |
|
|---|
| 412 | (defppclapfunction %subtract-with-borrow-1 ((a-h 4) (a-l 0) (b-h arg_x) (b-l
|
|---|
| 413 | arg_y) (borrow-in arg_z))
|
|---|
| 414 | (let ((a imm0)
|
|---|
| 415 | (b imm1)
|
|---|
| 416 | (temp imm2)
|
|---|
| 417 | (c imm3))
|
|---|
| 418 | (lwz temp0 a-h vsp)
|
|---|
| 419 | (lwz temp1 a-l vsp)
|
|---|
| 420 | (compose-digit a temp0 temp1)
|
|---|
| 421 | (compose-digit b b-h b-l)
|
|---|
| 422 | (unbox-fixnum c borrow-in)
|
|---|
| 423 | (li temp -1)
|
|---|
| 424 | (addc temp c temp)
|
|---|
| 425 | (subfe a b a)
|
|---|
| 426 | (addze c rzero)
|
|---|
| 427 | (box-fixnum c c)
|
|---|
| 428 | (digit-h temp0 a)
|
|---|
| 429 | (digit-l temp1 a)
|
|---|
| 430 | (vpush temp0)
|
|---|
| 431 | (vpush temp1)
|
|---|
| 432 | (vpush c)
|
|---|
| 433 | (la temp0 20 vsp)
|
|---|
| 434 | (set-nargs 3)
|
|---|
| 435 | (ba .SPvalues)))
|
|---|
| 436 |
|
|---|
| 437 |
|
|---|
| 438 |
|
|---|
| 439 | (defppclapfunction %subtract-one ((a-h arg_y)(a-l arg_z))
|
|---|
| 440 | (let ((a imm0))
|
|---|
| 441 | (compose-digit a a-h a-l)
|
|---|
| 442 | (subi a a 1)
|
|---|
| 443 | (digit-h temp0 a)
|
|---|
| 444 | (vpush temp0)
|
|---|
| 445 | (digit-l temp0 a)
|
|---|
| 446 | (vpush temp0)
|
|---|
| 447 | (la temp0 8 vsp)
|
|---|
| 448 | (set-nargs 2)
|
|---|
| 449 | (ba .spvalues)))
|
|---|
| 450 |
|
|---|
| 451 |
|
|---|
| 452 |
|
|---|
| 453 |
|
|---|
| 454 | ;;; %MULTIPLY-AND-ADD -- Internal.
|
|---|
| 455 | ;;;
|
|---|
| 456 | ;;; This multiplies x-digit and y-digit, producing high and low digits
|
|---|
| 457 | ;;; manifesting the result. Then it adds the low digit, res-digit, and
|
|---|
| 458 | ;;; carry-in-digit. Any carries (note, you still have to add two digits at a
|
|---|
| 459 | ;;; time possibly producing two carries) from adding these three digits get
|
|---|
| 460 | ;;; added to the high digit from the multiply, producing the next carry digit.
|
|---|
| 461 | ;;; Res-digit is optional since two uses of this primitive multiplies a single
|
|---|
| 462 | ;;; digit bignum by a multiple digit bignum, and in this situation there is no
|
|---|
| 463 | ;;; need for a result buffer accumulating partial results which is where the
|
|---|
| 464 | ;;; res-digit comes from.
|
|---|
| 465 | ;;; [slh] I assume that the returned carry "digit" can only be 0, 1 or 2
|
|---|
| 466 |
|
|---|
| 467 |
|
|---|
| 468 | (defppclapfunction %multiply-and-add-1 ((x-high 8)
|
|---|
| 469 | (x-low 4)
|
|---|
| 470 | (y-high 0)
|
|---|
| 471 | (y-low arg_x)
|
|---|
| 472 | (carry-in-high arg_y)
|
|---|
| 473 | (carry-in-low arg_z))
|
|---|
| 474 | (let ((x imm0)
|
|---|
| 475 | (y imm1)
|
|---|
| 476 | (carry-in imm2)
|
|---|
| 477 | (lo imm3)
|
|---|
| 478 | (hi imm4))
|
|---|
| 479 | (compose-digit carry-in carry-in-high carry-in-low)
|
|---|
| 480 | (vpop temp0)
|
|---|
| 481 | (compose-digit y temp0 y-low)
|
|---|
| 482 | (vpop temp0)
|
|---|
| 483 | (vpop temp1)
|
|---|
| 484 | (compose-digit x temp1 temp0)
|
|---|
| 485 | (mullw lo x y)
|
|---|
| 486 | (mulhwu hi x y)
|
|---|
| 487 | (addc lo lo carry-in)
|
|---|
| 488 | (addze hi hi)
|
|---|
| 489 | (digit-h temp0 hi)
|
|---|
| 490 | (digit-l temp1 hi)
|
|---|
| 491 | (digit-h temp2 lo)
|
|---|
| 492 | (digit-l temp3 lo)
|
|---|
| 493 | (vpush temp0)
|
|---|
| 494 | (vpush temp1)
|
|---|
| 495 | (vpush temp2)
|
|---|
| 496 | (vpush temp3)
|
|---|
| 497 | (set-nargs 4)
|
|---|
| 498 | (la temp0 16 vsp)
|
|---|
| 499 | (ba .SPvalues)))
|
|---|
| 500 |
|
|---|
| 501 |
|
|---|
| 502 | (defppclapfunction %logcount-complement ((bignum arg_y) (idx arg_z))
|
|---|
| 503 | (let ((arg imm0)
|
|---|
| 504 | (shift imm1)
|
|---|
| 505 | (temp imm2))
|
|---|
| 506 | (la arg ppc32::misc-data-offset idx)
|
|---|
| 507 | (lwzx arg bignum arg)
|
|---|
| 508 | (not. shift arg)
|
|---|
| 509 | (li arg_z 0)
|
|---|
| 510 | (if ne
|
|---|
| 511 | (progn
|
|---|
| 512 | @loop
|
|---|
| 513 | (la temp -1 shift)
|
|---|
| 514 | (and. shift shift temp)
|
|---|
| 515 | (la arg_z '1 arg_z)
|
|---|
| 516 | (bne @loop)))
|
|---|
| 517 | (blr)))
|
|---|
| 518 |
|
|---|
| 519 | (defppclapfunction %logcount ((bignum arg_y) (idx arg_z))
|
|---|
| 520 | (let ((arg imm0)
|
|---|
| 521 | (shift imm1)
|
|---|
| 522 | (temp imm2))
|
|---|
| 523 | (la arg ppc32::misc-data-offset idx)
|
|---|
| 524 | (lwzx arg bignum arg)
|
|---|
| 525 | (mr. shift arg)
|
|---|
| 526 | (li arg_z 0)
|
|---|
| 527 | (if ne
|
|---|
| 528 | (progn
|
|---|
| 529 | @loop
|
|---|
| 530 | (la temp -1 shift)
|
|---|
| 531 | (and. shift shift temp)
|
|---|
| 532 | (la arg_z '1 arg_z)
|
|---|
| 533 | (bne @loop)))
|
|---|
| 534 | (blr)))
|
|---|
| 535 |
|
|---|
| 536 | ; return res
|
|---|
| 537 | (defppclapfunction bignum-add-loop-2 ((aptr arg_x)(bptr arg_y) (result arg_z))
|
|---|
| 538 | (let ((idx imm0)
|
|---|
| 539 | (count imm1)
|
|---|
| 540 | (x imm2)
|
|---|
| 541 | (y imm3)
|
|---|
| 542 | (len-a temp0)
|
|---|
| 543 | (len-b temp1)
|
|---|
| 544 | (tem temp2))
|
|---|
| 545 | (li idx ppc32::misc-data-offset)
|
|---|
| 546 | (lwz imm4 ppc32::misc-header-offset aptr)
|
|---|
| 547 | (header-length len-a imm4)
|
|---|
| 548 | (lwz imm4 ppc32::misc-header-offset bptr)
|
|---|
| 549 | (header-length len-b imm4)
|
|---|
| 550 | ; make a be shorter one
|
|---|
| 551 | (cmpw len-a len-b)
|
|---|
| 552 | (li count 0)
|
|---|
| 553 | ; initialize carry 0
|
|---|
| 554 | (addc x rzero rzero)
|
|---|
| 555 | (ble @loop)
|
|---|
| 556 | ; b shorter - swap em
|
|---|
| 557 | (mr tem len-a)
|
|---|
| 558 | (mr len-a len-b)
|
|---|
| 559 | (mr len-b tem)
|
|---|
| 560 | (mr tem aptr)
|
|---|
| 561 | (mr aptr bptr)
|
|---|
| 562 | (mr bptr tem)
|
|---|
| 563 | @loop
|
|---|
| 564 | (lwzx y aptr idx)
|
|---|
| 565 | (lwzx x bptr idx)
|
|---|
| 566 | (addi count count '1)
|
|---|
| 567 | (cmpw count len-a)
|
|---|
| 568 | (adde x x y)
|
|---|
| 569 | (stwx x result idx)
|
|---|
| 570 | (addi idx idx '1)
|
|---|
| 571 | (blt @loop)
|
|---|
| 572 | ; now propagate carry thru longer (b) using sign of shorter
|
|---|
| 573 | ;(SUBI imm4 idx '1) ; y has hi order word of a
|
|---|
| 574 | ;(lwzx y aptr imm4)
|
|---|
| 575 | (cmpw len-a len-b)
|
|---|
| 576 | (adde imm4 rzero rzero) ; get carry
|
|---|
| 577 | (srawi y y 31) ; p.o.s clobbers carry
|
|---|
| 578 | (addic imm4 imm4 -1) ; restore carry
|
|---|
| 579 | (beq @l3) ; unless equal
|
|---|
| 580 | @loop2
|
|---|
| 581 | (lwzx x bptr idx)
|
|---|
| 582 | (adde x x y)
|
|---|
| 583 | (stwx x result idx)
|
|---|
| 584 | (addi count count '1)
|
|---|
| 585 | (cmpw count len-b)
|
|---|
| 586 | (addi idx idx '1)
|
|---|
| 587 | (blt @loop2)
|
|---|
| 588 | ; y has sign of shorter - get sign of longer to x
|
|---|
| 589 | @l3
|
|---|
| 590 | (subi imm4 idx '1)
|
|---|
| 591 | (lwzx x bptr imm4)
|
|---|
| 592 | (adde imm4 rzero rzero) ; get carry
|
|---|
| 593 | (srawi x x 31) ; clobbers carry
|
|---|
| 594 | (addic imm4 imm4 -1)
|
|---|
| 595 | (adde x x y)
|
|---|
| 596 | (stwx x result idx)
|
|---|
| 597 | (blr)))
|
|---|
| 598 |
|
|---|
| 599 | ;; same as above but with initial a index and finishes
|
|---|
| 600 | (defppclapfunction bignum-add-loop-+ ((init-a 0)(aptr arg_x)(bptr arg_y)(length arg_z))
|
|---|
| 601 | (let ((idx imm0)
|
|---|
| 602 | (count imm1)
|
|---|
| 603 | (x imm2)
|
|---|
| 604 | (y imm3)
|
|---|
| 605 | (aidx imm4))
|
|---|
| 606 | (li idx ppc32::misc-data-offset)
|
|---|
| 607 | (lwz aidx init-a vsp)
|
|---|
| 608 | (addi aidx aidx ppc32::misc-data-offset)
|
|---|
| 609 | (li count 0)
|
|---|
| 610 | ; initialize carry 0
|
|---|
| 611 | (addc x rzero rzero)
|
|---|
| 612 | @loop
|
|---|
| 613 | (lwzx x aptr aidx)
|
|---|
| 614 | (lwzx y bptr idx)
|
|---|
| 615 | (adde x x y)
|
|---|
| 616 | (stwx x aptr aidx)
|
|---|
| 617 | (addi count count '1)
|
|---|
| 618 | (cmpw count length)
|
|---|
| 619 | (addi idx idx '1)
|
|---|
| 620 | (addi aidx aidx '1)
|
|---|
| 621 | (blt @loop)
|
|---|
| 622 | (lwzx x aptr aidx) ; add carry into next one
|
|---|
| 623 | (adde x x rzero)
|
|---|
| 624 | (stwx x aptr aidx)
|
|---|
| 625 | (la vsp 4 vsp)
|
|---|
| 626 | (blr)))
|
|---|
| 627 |
|
|---|
| 628 |
|
|---|
| 629 |
|
|---|
| 630 | (defppclapfunction bignum-negate-loop-really ((big arg_x) (len arg_y) (result arg_z))
|
|---|
| 631 | (let ((idx imm0)
|
|---|
| 632 | (one imm1)
|
|---|
| 633 | (x imm2))
|
|---|
| 634 | (li idx ppc32::misc-data-offset)
|
|---|
| 635 | (li one '1)
|
|---|
| 636 | ; initialize carry 1
|
|---|
| 637 | (li x -1)
|
|---|
| 638 | (addic x x 1)
|
|---|
| 639 | @loop
|
|---|
| 640 | ;(addi count count '1)
|
|---|
| 641 | ;(cmpw count len)
|
|---|
| 642 | (subf. len one len)
|
|---|
| 643 | (lwzx x big idx)
|
|---|
| 644 | (not x x)
|
|---|
| 645 | (adde x x rzero)
|
|---|
| 646 | (stwx x result idx)
|
|---|
| 647 | (addi idx idx '1)
|
|---|
| 648 | (bgt @loop)
|
|---|
| 649 | ; return carry
|
|---|
| 650 | (li x 0)
|
|---|
| 651 | (adde x x rzero)
|
|---|
| 652 | (box-fixnum arg_z x)
|
|---|
| 653 | (blr)))
|
|---|
| 654 |
|
|---|
| 655 | (defppclapfunction bignum-negate-to-pointer ((big arg_x) (len arg_y) (result arg_z))
|
|---|
| 656 | (let ((idx imm0)
|
|---|
| 657 | (one imm1)
|
|---|
| 658 | (x imm2)
|
|---|
| 659 | (oidx imm3)
|
|---|
| 660 | (ptr imm4))
|
|---|
| 661 | (li idx ppc32::misc-data-offset)
|
|---|
| 662 | (li oidx 0)
|
|---|
| 663 | (macptr-ptr ptr result)
|
|---|
| 664 | (li one '1)
|
|---|
| 665 | ; initialize carry 1
|
|---|
| 666 | (li x -1)
|
|---|
| 667 | (addic x x 1)
|
|---|
| 668 | @loop
|
|---|
| 669 | ;(addi count count '1)
|
|---|
| 670 | ;(cmpw count len)
|
|---|
| 671 | (subf. len one len)
|
|---|
| 672 | (lwzx x big idx)
|
|---|
| 673 | (not x x)
|
|---|
| 674 | (adde x x rzero)
|
|---|
| 675 | (stwx x ptr oidx)
|
|---|
| 676 | (addi idx idx '1)
|
|---|
| 677 | (addi oidx oidx 4)
|
|---|
| 678 | (bgt @loop)
|
|---|
| 679 | ; return carry
|
|---|
| 680 | (li x 0)
|
|---|
| 681 | (adde x x rzero)
|
|---|
| 682 | (box-fixnum arg_z x)
|
|---|
| 683 | (blr)))
|
|---|
| 684 |
|
|---|
| 685 | ;; she do tolerate len = jidx
|
|---|
| 686 | (defppclapfunction bignum-shift-left-loop ((nbits 4)(result 0) (bignum arg_x) (len arg_y) (jidx arg_z))
|
|---|
| 687 | (let ((y imm0)
|
|---|
| 688 | (idx imm1)
|
|---|
| 689 | (bits imm2)
|
|---|
| 690 | (rbits imm3)
|
|---|
| 691 | (x imm4)
|
|---|
| 692 | (iidx temp0)
|
|---|
| 693 | (resptr temp1))
|
|---|
| 694 | (li iidx 0)
|
|---|
| 695 | (lwz bits nbits vsp)
|
|---|
| 696 | (lwz resptr result vsp)
|
|---|
| 697 | (unbox-fixnum bits bits)
|
|---|
| 698 | (subfic rbits bits 32)
|
|---|
| 699 | ;(dbg)
|
|---|
| 700 | (lwz imm4 ppc32::misc-data-offset bignum)
|
|---|
| 701 | (slw imm4 imm4 bits)
|
|---|
| 702 | (la y (+ ppc32::misc-data-offset -4) jidx)
|
|---|
| 703 | (stwx imm4 y resptr)
|
|---|
| 704 |
|
|---|
| 705 | (cmpw len jidx)
|
|---|
| 706 | (beq @done)
|
|---|
| 707 | @loop
|
|---|
| 708 | (addi idx iidx ppc32::misc-data-offset)
|
|---|
| 709 | (lwzx x bignum idx)
|
|---|
| 710 | (srw x x rbits)
|
|---|
| 711 | (addi idx idx '1)
|
|---|
| 712 | (lwzx y bignum idx)
|
|---|
| 713 | (slw y y bits)
|
|---|
| 714 | (or x x y)
|
|---|
| 715 | (addi idx jidx ppc32::misc-data-offset)
|
|---|
| 716 | (stwx x resptr idx)
|
|---|
| 717 | (addi jidx jidx '1)
|
|---|
| 718 | (cmpw jidx len)
|
|---|
| 719 | (addi iidx iidx '1)
|
|---|
| 720 | (blt @loop)
|
|---|
| 721 | @done
|
|---|
| 722 | ; do first - lo order
|
|---|
| 723 |
|
|---|
| 724 | ; do last - hi order
|
|---|
| 725 | (addi idx iidx ppc32::misc-data-offset)
|
|---|
| 726 | ;(dbg t)
|
|---|
| 727 | (lwzx y bignum idx)
|
|---|
| 728 | (sraw y y rbits)
|
|---|
| 729 | (addi idx len ppc32::misc-data-offset)
|
|---|
| 730 | (stwx y resptr idx)
|
|---|
| 731 | (la vsp 8 vsp)
|
|---|
| 732 | (blr)))
|
|---|
| 733 |
|
|---|
| 734 |
|
|---|
| 735 |
|
|---|
| 736 | (defppclapfunction bignum-shift-right-loop-1 ((nbits 4)(result 0) (bignum arg_x) (len arg_y) (iidx arg_z))
|
|---|
| 737 | (let ((y imm0)
|
|---|
| 738 | (idx imm1)
|
|---|
| 739 | (bits imm2)
|
|---|
| 740 | (rbits imm3)
|
|---|
| 741 | (x imm4)
|
|---|
| 742 | (jidx temp0)
|
|---|
| 743 | (resptr temp1))
|
|---|
| 744 | (li jidx 0)
|
|---|
| 745 | (lwz bits nbits vsp)
|
|---|
| 746 | (lwz resptr result vsp)
|
|---|
| 747 | (unbox-fixnum bits bits)
|
|---|
| 748 | (cmpw jidx len)
|
|---|
| 749 | (subfic rbits bits 32)
|
|---|
| 750 | (bge @done)
|
|---|
| 751 | @loop
|
|---|
| 752 | (addi idx iidx ppc32::misc-data-offset)
|
|---|
| 753 | (lwzx x bignum idx)
|
|---|
| 754 | (srw x x bits)
|
|---|
| 755 | (addi idx idx '1)
|
|---|
| 756 | (lwzx y bignum idx)
|
|---|
| 757 | (slw y y rbits)
|
|---|
| 758 | (or x x y)
|
|---|
| 759 | (addi idx jidx ppc32::misc-data-offset)
|
|---|
| 760 | (stwx x resptr idx)
|
|---|
| 761 | (addi jidx jidx '1)
|
|---|
| 762 | (cmpw jidx len)
|
|---|
| 763 | (addi iidx iidx '1)
|
|---|
| 764 | (blt @loop)
|
|---|
| 765 | @done
|
|---|
| 766 | (addi idx iidx ppc32::misc-data-offset)
|
|---|
| 767 | (lwzx x bignum idx)
|
|---|
| 768 | (sraw x x bits)
|
|---|
| 769 | (addi idx jidx ppc32::misc-data-offset)
|
|---|
| 770 | (stwx x resptr idx)
|
|---|
| 771 | (la vsp 8 vsp)
|
|---|
| 772 | (blr)))
|
|---|
| 773 |
|
|---|
| 774 |
|
|---|
| 775 | (defppclapfunction %compare-digits ((a arg_x) (b arg_y) (idx arg_z))
|
|---|
| 776 | (la imm0 ppc32::misc-data-offset idx)
|
|---|
| 777 | (lwzx imm1 a imm0)
|
|---|
| 778 | (lwzx imm0 b imm0)
|
|---|
| 779 | (cmplw imm1 imm0)
|
|---|
| 780 | (li arg_z '0)
|
|---|
| 781 | (beqlr)
|
|---|
| 782 | (li arg_z '1)
|
|---|
| 783 | (bgtlr)
|
|---|
| 784 | (li arg_z '-1)
|
|---|
| 785 | (blr))
|
|---|
| 786 |
|
|---|
| 787 |
|
|---|
| 788 |
|
|---|
| 789 | ;; returns number of bits in digit-hi,digit-lo that are sign bits
|
|---|
| 790 | ;; 32 - digits-sign-bits is integer-length
|
|---|
| 791 |
|
|---|
| 792 | (defppclapfunction %digits-sign-bits ((hi arg_y) (lo arg_z))
|
|---|
| 793 | (rlwinm. imm1 hi (- 16 ppc32::fixnumshift) 0 15)
|
|---|
| 794 | (rlwimi imm1 lo (- 32 ppc32::fixnumshift) 16 31)
|
|---|
| 795 | (not imm1 imm1)
|
|---|
| 796 | (blt @wasneg)
|
|---|
| 797 | (not imm1 imm1)
|
|---|
| 798 | @wasneg
|
|---|
| 799 | (cntlzw imm1 imm1)
|
|---|
| 800 | (box-fixnum arg_z imm1)
|
|---|
| 801 | (blr))
|
|---|
| 802 |
|
|---|
| 803 | (defppclapfunction bignum-logtest-loop ((count arg_x) (s1 arg_y) (s2 arg_z))
|
|---|
| 804 | (addi imm1 rzero ppc32::misc-data-offset)
|
|---|
| 805 | @loop
|
|---|
| 806 | (lwzx imm2 s1 imm1)
|
|---|
| 807 | (lwzx imm3 s2 imm1)
|
|---|
| 808 | (and. imm2 imm3 imm2)
|
|---|
| 809 | (addi imm1 imm1 4)
|
|---|
| 810 | (bne @true)
|
|---|
| 811 | (subic. count count 4)
|
|---|
| 812 | (bgt @loop)
|
|---|
| 813 | (li arg_z (target-nil-value))
|
|---|
| 814 | (blr)
|
|---|
| 815 | @true
|
|---|
| 816 | (li arg_z (+ (target-nil-value) ppc32::t-offset))
|
|---|
| 817 | (blr))
|
|---|
| 818 |
|
|---|
| 819 | ;;; dest[idx] <- (lognot src[idx])
|
|---|
| 820 | (defppclapfunction %bignum-lognot ((idx arg_x) (src arg_y) (dest arg_z))
|
|---|
| 821 | (la imm1 ppc32::misc-data-offset idx)
|
|---|
| 822 | (lwzx imm0 src imm1)
|
|---|
| 823 | (not imm0 imm0)
|
|---|
| 824 | (stwx imm0 dest imm1)
|
|---|
| 825 | (blr))
|
|---|
| 826 |
|
|---|
| 827 | ;;; dest[idx] <- (logand x[idx] y[idx])
|
|---|
| 828 | (defppclapfunction %bignum-logand ((idx 0) (x arg_x) (y arg_y) (dest arg_z))
|
|---|
| 829 | (vpop temp0)
|
|---|
| 830 | (la imm1 ppc32::misc-data-offset temp0)
|
|---|
| 831 | (lwzx imm0 x imm1)
|
|---|
| 832 | (lwzx imm2 y imm1)
|
|---|
| 833 | (and imm0 imm0 imm2)
|
|---|
| 834 | (stwx imm0 dest imm1)
|
|---|
| 835 | (blr))
|
|---|
| 836 |
|
|---|
| 837 | ;;; dest[idx] <- (logandc2 x[idx] y[idx])
|
|---|
| 838 | (defppclapfunction %bignum-logandc2 ((idx 0) (x arg_x) (y arg_y) (dest arg_z))
|
|---|
| 839 | (vpop temp0)
|
|---|
| 840 | (la imm1 ppc32::misc-data-offset temp0)
|
|---|
| 841 | (lwzx imm0 x imm1)
|
|---|
| 842 | (lwzx imm2 y imm1)
|
|---|
| 843 | (andc imm0 imm0 imm2)
|
|---|
| 844 | (stwx imm0 dest imm1)
|
|---|
| 845 | (blr))
|
|---|
| 846 |
|
|---|
| 847 | ;;; dest[idx] <- (logandc1 x[idx] y[idx])
|
|---|
| 848 | (defppclapfunction %bignum-logandc1 ((idx 0) (x arg_x) (y arg_y) (dest arg_z))
|
|---|
| 849 | (vpop temp0)
|
|---|
| 850 | (la imm1 ppc32::misc-data-offset temp0)
|
|---|
| 851 | (lwzx imm0 x imm1)
|
|---|
| 852 | (lwzx imm2 y imm1)
|
|---|
| 853 | (andc imm0 imm2 imm0)
|
|---|
| 854 | (stwx imm0 dest imm1)
|
|---|
| 855 | (blr))
|
|---|
| 856 |
|
|---|
| 857 |
|
|---|
| 858 |
|
|---|
| 859 | (defppclapfunction digit-lognot-move ((index arg_x) (source arg_y) (dest arg_z))
|
|---|
| 860 | (let ((scaled-index imm1))
|
|---|
| 861 | (vref32 imm0 source index scaled-index) ; imm1 has c(index) + data-offset
|
|---|
| 862 | (not imm0 imm0)
|
|---|
| 863 | (stwx imm0 dest scaled-index)
|
|---|
| 864 | (blr)))
|
|---|
| 865 |
|
|---|
| 866 | ; if dest not nil store unboxed result in dest(0), else return boxed result
|
|---|
| 867 | (defppclapfunction fix-digit-logandc2 ((fix arg_x) (big arg_y) (dest arg_z)) ; index 0
|
|---|
| 868 | (let ((w1 imm0)
|
|---|
| 869 | (w2 imm1))
|
|---|
| 870 | (unbox-fixnum w1 fix)
|
|---|
| 871 | (lwz w2 ppc32::misc-data-offset big)
|
|---|
| 872 | (cmpwi dest (target-nil-value))
|
|---|
| 873 | (not w2 w2)
|
|---|
| 874 | (and w1 w1 w2)
|
|---|
| 875 | (bne @store)
|
|---|
| 876 | (box-fixnum arg_z w1)
|
|---|
| 877 | (blr)
|
|---|
| 878 | @store
|
|---|
| 879 | (stw w1 ppc32::misc-data-offset dest)
|
|---|
| 880 | (blr)))
|
|---|
| 881 |
|
|---|
| 882 |
|
|---|
| 883 |
|
|---|
| 884 | (defppclapfunction fix-digit-logand ((fix arg_x) (big arg_y) (dest arg_z)) ; index 0
|
|---|
| 885 | (let ((w1 imm0)
|
|---|
| 886 | (w2 imm1))
|
|---|
| 887 | (unbox-fixnum w1 fix)
|
|---|
| 888 | (lwz w2 ppc32::misc-data-offset big)
|
|---|
| 889 | (cmpwi dest (target-nil-value))
|
|---|
| 890 | (and w1 w1 w2)
|
|---|
| 891 | (bne @store)
|
|---|
| 892 | (box-fixnum arg_z w1)
|
|---|
| 893 | (blr)
|
|---|
| 894 | @store
|
|---|
| 895 | (stw w1 ppc32::misc-data-offset dest)
|
|---|
| 896 | (blr)))
|
|---|
| 897 |
|
|---|
| 898 |
|
|---|
| 899 |
|
|---|
| 900 | (defppclapfunction fix-digit-logandc1 ((fix arg_x) (big arg_y) (dest arg_z)) ; index 0
|
|---|
| 901 | (let ((w1 imm0)
|
|---|
| 902 | (w2 imm1))
|
|---|
| 903 | (unbox-fixnum w1 fix)
|
|---|
| 904 | (lwz w2 ppc32::misc-data-offset big)
|
|---|
| 905 | (cmpwi dest (target-nil-value))
|
|---|
| 906 | (not w1 w1)
|
|---|
| 907 | (and w1 w1 w2)
|
|---|
| 908 | (bne @store)
|
|---|
| 909 | (box-fixnum arg_z w1)
|
|---|
| 910 | (blr)
|
|---|
| 911 | @store
|
|---|
| 912 | (stw w1 ppc32::misc-data-offset dest)
|
|---|
| 913 | (blr)))
|
|---|
| 914 |
|
|---|
| 915 | ;;; dest[idx] <- (logior x[idx] y[idx])
|
|---|
| 916 | (defppclapfunction %bignum-logior ((idx 0) (x arg_x) (y arg_y) (dest arg_z))
|
|---|
| 917 | (vpop temp0)
|
|---|
| 918 | (la imm1 ppc32::misc-data-offset temp0)
|
|---|
| 919 | (lwzx imm0 x imm1)
|
|---|
| 920 | (lwzx imm2 y imm1)
|
|---|
| 921 | (or imm0 imm0 imm2)
|
|---|
| 922 | (stwx imm0 dest imm1)
|
|---|
| 923 | (blr))
|
|---|
| 924 |
|
|---|
| 925 | ;;; dest[idx] <- (logxor x[idx] y[idx])
|
|---|
| 926 | (defppclapfunction %bignum-logxor ((idx 0) (x arg_x) (y arg_y) (dest arg_z))
|
|---|
| 927 | (vpop temp0)
|
|---|
| 928 | (la imm1 ppc32::misc-data-offset temp0)
|
|---|
| 929 | (lwzx imm0 x imm1)
|
|---|
| 930 | (lwzx imm2 y imm1)
|
|---|
| 931 | (xor imm0 imm0 imm2)
|
|---|
| 932 | (stwx imm0 dest imm1)
|
|---|
| 933 | (blr))
|
|---|
| 934 |
|
|---|
| 935 |
|
|---|
| 936 |
|
|---|
| 937 | (defppclapfunction bignum-xor-loop ((count 0) (s1 arg_x) (s2 arg_y) (dest arg_z))
|
|---|
| 938 | (lwz imm0 count vsp)
|
|---|
| 939 | (addi imm1 rzero ppc32::misc-data-offset)
|
|---|
| 940 | @loop
|
|---|
| 941 | (lwzx imm2 s1 imm1)
|
|---|
| 942 | (lwzx imm3 s2 imm1)
|
|---|
| 943 | (xor imm2 imm3 imm2)
|
|---|
| 944 | (subic. imm0 imm0 4)
|
|---|
| 945 | (stwx imm2 dest imm1)
|
|---|
| 946 | (addi imm1 imm1 4)
|
|---|
| 947 | (bgt @loop)
|
|---|
| 948 | @out
|
|---|
| 949 | (la vsp 4 vsp)
|
|---|
| 950 | (blr))
|
|---|
| 951 |
|
|---|
| 952 | #+nomore
|
|---|
| 953 | (defppclapfunction try-guess-loop-1 ((guess-h 8)(guess-l 4)(len-y 0)
|
|---|
| 954 | (xidx arg_x) (xptr arg_y) (yptr arg_z))
|
|---|
| 955 | (let ((guess imm0)
|
|---|
| 956 | (carry imm1)
|
|---|
| 957 | (y imm2)
|
|---|
| 958 | (x imm2)
|
|---|
| 959 | (prod-l imm3)
|
|---|
| 960 | (prod-h imm4)
|
|---|
| 961 | (tem imm4)
|
|---|
| 962 | (yidx temp0)
|
|---|
| 963 | (end-y temp1)
|
|---|
| 964 | (carry-bit temp2))
|
|---|
| 965 | (lwz x guess-h vsp)
|
|---|
| 966 | (lwz tem guess-l vsp)
|
|---|
| 967 | (compose-digit guess x tem)
|
|---|
| 968 | (lwz end-y len-y vsp)
|
|---|
| 969 | (li yidx 0)
|
|---|
| 970 | (li carry 0)
|
|---|
| 971 | (li carry-bit '1)
|
|---|
| 972 | @loop
|
|---|
| 973 | ; multiply guess by ydigit, add carry to lo, hi is new carry
|
|---|
| 974 | ; then get an xdigit subtract prod-lo from it and store result in x (remember carry)
|
|---|
| 975 | (addi tem yidx ppc32::misc-data-offset) ; get yidx
|
|---|
| 976 | (lwzx y yptr tem)
|
|---|
| 977 | (mullw prod-l guess y)
|
|---|
| 978 | (mulhwu prod-h guess y)
|
|---|
| 979 | (addc prod-l prod-l carry)
|
|---|
| 980 | (adde carry prod-h rzero)
|
|---|
| 981 | ; get back saved carry
|
|---|
| 982 | (li tem '-1)
|
|---|
| 983 | (addc tem carry-bit tem)
|
|---|
| 984 | (addi tem xidx ppc32::misc-data-offset)
|
|---|
| 985 | (lwzx x xptr tem)
|
|---|
| 986 | (subfe x prod-l x)
|
|---|
| 987 | (stwx x xptr tem)
|
|---|
| 988 | ; save carry
|
|---|
| 989 | (adde prod-l rzero rzero)
|
|---|
| 990 | (box-fixnum carry-bit prod-l)
|
|---|
| 991 | (addi yidx yidx '1)
|
|---|
| 992 | (cmpw yidx end-y)
|
|---|
| 993 | (addi xidx xidx '1)
|
|---|
| 994 | (blt @loop)
|
|---|
| 995 | ; finally subtract carry from last x digit
|
|---|
| 996 | @done
|
|---|
| 997 | (li prod-l '-1) ; get back saved carry again - box clobbered it?
|
|---|
| 998 | (addc prod-l carry-bit prod-l)
|
|---|
| 999 | (addi tem xidx ppc32::misc-data-offset) ; maybe still there - nope
|
|---|
| 1000 | (lwzx x xptr tem)
|
|---|
| 1001 | (subfe x carry x)
|
|---|
| 1002 | (stwx x xptr tem)
|
|---|
| 1003 | (la vsp 12 vsp)
|
|---|
| 1004 | (blr)))
|
|---|
| 1005 |
|
|---|
| 1006 | ;; x0 is at index, x1 at index-1, x2 at index-2
|
|---|
| 1007 | ;; y1 is at index, y2 at index-1
|
|---|
| 1008 | ;; this doesnt help much
|
|---|
| 1009 | (defppclapfunction truncate-guess-loop ((guess-h 8)(guess-l 4)(x 0)
|
|---|
| 1010 | (xidx arg_x)(yptr arg_y) (yidx arg_z))
|
|---|
| 1011 | (let ((guess imm0)
|
|---|
| 1012 | (y1 imm1)
|
|---|
| 1013 | (y2 imm1)
|
|---|
| 1014 | (gy1-lo imm2) ; look out below
|
|---|
| 1015 | (gy1-hi imm2)
|
|---|
| 1016 | (gy2-lo imm2)
|
|---|
| 1017 | (gy2-hi imm2)
|
|---|
| 1018 | (xptr temp0)
|
|---|
| 1019 | (m imm3)
|
|---|
| 1020 | (tem imm4)
|
|---|
| 1021 | (y1-idx 28)
|
|---|
| 1022 | (y2-idx 24)
|
|---|
| 1023 | (x0-idx 20)
|
|---|
| 1024 | (x1-idx 16)
|
|---|
| 1025 | (x2-idx 12))
|
|---|
| 1026 | (stwu tsp -32 tsp)
|
|---|
| 1027 | (stw tsp 4 tsp)
|
|---|
| 1028 | (lwz y1 guess-h vsp)
|
|---|
| 1029 | (lwz tem guess-l vsp)
|
|---|
| 1030 | (compose-digit guess y1 tem)
|
|---|
| 1031 | (addi tem yidx ppc32::misc-data-offset)
|
|---|
| 1032 | (lwzx y1 yptr tem)
|
|---|
| 1033 | (stw y1 y1-idx tsp)
|
|---|
| 1034 | (subi tem tem 4)
|
|---|
| 1035 | (lwzx y2 yptr tem)
|
|---|
| 1036 | (stw y2 y2-idx tsp)
|
|---|
| 1037 | (lwz xptr x vsp)
|
|---|
| 1038 | (addi tem xidx ppc32::misc-data-offset)
|
|---|
| 1039 | (lwzx y1 xptr tem) ; its x0
|
|---|
| 1040 | (stw y1 x0-idx tsp)
|
|---|
| 1041 | (subi tem tem 4)
|
|---|
| 1042 | (lwzx y1 xptr tem)
|
|---|
| 1043 | (stw y1 x1-idx tsp)
|
|---|
| 1044 | (subi tem tem 4)
|
|---|
| 1045 | (lwzx y1 xptr tem)
|
|---|
| 1046 | (stw y1 x2-idx tsp)
|
|---|
| 1047 | @loop
|
|---|
| 1048 | (lwz y1 y1-idx tsp) ; get y1
|
|---|
| 1049 | (mullw gy1-lo guess y1)
|
|---|
| 1050 | (lwz m x1-idx tsp) ; get x1
|
|---|
| 1051 | (subc m m gy1-lo) ; x1 - gy1-lo => m
|
|---|
| 1052 | (mulhwu gy1-hi guess y1)
|
|---|
| 1053 | (lwz tem x0-idx tsp) ; get x0
|
|---|
| 1054 | (subfe. tem gy1-hi tem) ; - val not used just cr
|
|---|
| 1055 | (lwz y2 y2-idx tsp) ; get y2
|
|---|
| 1056 | (mulhwu gy2-hi guess y2) ; does it pay to do this now even tho may not need?
|
|---|
| 1057 | (bne @done)
|
|---|
| 1058 | (cmpl :cr0 gy2-hi m) ; if > or = and foo then more - L means logical means unsigned
|
|---|
| 1059 | (blt @done) ; if < done
|
|---|
| 1060 | (bne @more) ; if = test lo
|
|---|
| 1061 | (mullw gy2-lo guess y2)
|
|---|
| 1062 | (lwz tem x2-idx tsp) ; get x2
|
|---|
| 1063 | (cmpl :cr0 gy2-lo tem)
|
|---|
| 1064 | (ble @done)
|
|---|
| 1065 | @more
|
|---|
| 1066 | (subi guess guess 1)
|
|---|
| 1067 | (b @loop)
|
|---|
| 1068 | @done
|
|---|
| 1069 | (digit-h temp0 guess)
|
|---|
| 1070 | (vpush temp0)
|
|---|
| 1071 | (digit-l temp0 guess)
|
|---|
| 1072 | (vpush temp0)
|
|---|
| 1073 | (la temp0 20 vsp)
|
|---|
| 1074 | (lwz tsp 0 tsp)
|
|---|
| 1075 | (set-nargs 2)
|
|---|
| 1076 | (ba .spvalues)))
|
|---|
| 1077 |
|
|---|
| 1078 | (defppclapfunction normalize-bignum-loop ((sign arg_x)(res arg_y)(len arg_z))
|
|---|
| 1079 | (let ((idx imm0)
|
|---|
| 1080 | (usign imm1)
|
|---|
| 1081 | (val imm2))
|
|---|
| 1082 | (unbox-fixnum usign sign)
|
|---|
| 1083 | (cmpwi len 0)
|
|---|
| 1084 | (addi idx len (- ppc32::misc-data-offset 4))
|
|---|
| 1085 | (beqlr) ; huh - can this ever happen?
|
|---|
| 1086 | @loop
|
|---|
| 1087 | (lwzx val res idx)
|
|---|
| 1088 | (cmpw val usign)
|
|---|
| 1089 | (subi idx idx '1)
|
|---|
| 1090 | (bne @neq)
|
|---|
| 1091 | (subic. len len '1)
|
|---|
| 1092 | (bgt @loop)
|
|---|
| 1093 | ; fall through - its all sign - return 1
|
|---|
| 1094 | (li arg_z '1)
|
|---|
| 1095 | (blr)
|
|---|
| 1096 | @neq
|
|---|
| 1097 | (rlwinm usign usign 0 0 0) ; hi bit
|
|---|
| 1098 | (rlwinm val val 0 0 0)
|
|---|
| 1099 | (cmpw usign val) ; is hi bit = sign, if so then done
|
|---|
| 1100 | (beqlr)
|
|---|
| 1101 | (addi len len '1) ; if not, need 1 more
|
|---|
| 1102 | (blr)))
|
|---|
| 1103 |
|
|---|
| 1104 | (defppclapfunction %normalize-bignum-2 ((fixp arg_y)(res arg_z))
|
|---|
| 1105 | (let ((idx imm0)
|
|---|
| 1106 | (usign imm1)
|
|---|
| 1107 | (val imm2)
|
|---|
| 1108 | (len arg_x)
|
|---|
| 1109 | (oldlen temp0))
|
|---|
| 1110 | (lwz imm4 (- ppc32::fulltag-misc) res)
|
|---|
| 1111 | (header-length len imm4)
|
|---|
| 1112 | (cmpwi len 0)
|
|---|
| 1113 | (mr oldlen len)
|
|---|
| 1114 | (addi idx len (- ppc32::misc-data-offset 4))
|
|---|
| 1115 | (beqlr) ; huh - can this ever happen?
|
|---|
| 1116 | (lwzx val res idx) ; high order word
|
|---|
| 1117 | (srawi usign val 31) ; get sign
|
|---|
| 1118 | @loop
|
|---|
| 1119 | (lwzx val res idx)
|
|---|
| 1120 | (cmpw val usign)
|
|---|
| 1121 | (subi idx idx '1)
|
|---|
| 1122 | (bne @neq)
|
|---|
| 1123 | (subic. len len '1)
|
|---|
| 1124 | (bgt @loop)
|
|---|
| 1125 | ; fall through - its all sign - return 1
|
|---|
| 1126 | (li len '1)
|
|---|
| 1127 | (rlwinm usign usign 0 0 0) ; hi bit
|
|---|
| 1128 | (b @more)
|
|---|
| 1129 | @neq
|
|---|
| 1130 | (rlwinm usign usign 0 0 0) ; hi bit
|
|---|
| 1131 | (rlwinm val val 0 0 0)
|
|---|
| 1132 | (cmpw usign val) ; is hi bit = sign, if so then done
|
|---|
| 1133 | (beq @more)
|
|---|
| 1134 | (addi len len '1) ; if not, need 1 more
|
|---|
| 1135 | (b @big)
|
|---|
| 1136 | @more
|
|---|
| 1137 | (cmpwi :cr1 fixp (target-nil-value))
|
|---|
| 1138 | (cmpwi len '1)
|
|---|
| 1139 | (beq :cr1 @big) ; dont return fixnum
|
|---|
| 1140 | (bgt @big)
|
|---|
| 1141 | ;; stuff for maybe fixnum
|
|---|
| 1142 | ;(dbg t)
|
|---|
| 1143 | (lwz val ppc32::misc-data-offset res)
|
|---|
| 1144 | (rlwinm imm4 val 0 0 2) ; hi 3 bits same? - we assume fixnumshift is 2
|
|---|
| 1145 | (srawi usign usign 2)
|
|---|
| 1146 | (cmpw usign imm4)
|
|---|
| 1147 | (bne @big)
|
|---|
| 1148 | (box-fixnum arg_z val)
|
|---|
| 1149 | (blr)
|
|---|
| 1150 | @big
|
|---|
| 1151 | (cmpw oldlen len)
|
|---|
| 1152 | (beqlr) ; same length - done
|
|---|
| 1153 | (li imm4 ppc32::subtag-bignum) ; set new length
|
|---|
| 1154 | (rlwimi imm4 len (- ppc32::num-subtag-bits ppc32::fixnumshift) 0 (- 31 ppc32::num-subtag-bits))
|
|---|
| 1155 | (stw imm4 ppc32::misc-header-offset res)
|
|---|
| 1156 | ; 0 to tail if negative
|
|---|
| 1157 | (cmpwi usign 0)
|
|---|
| 1158 | (beqlr)
|
|---|
| 1159 | ; zero from len inclusive to oldlen exclusive
|
|---|
| 1160 | ;(dbg t)
|
|---|
| 1161 | (addi idx len ppc32::misc-data-offset)
|
|---|
| 1162 | @loop2
|
|---|
| 1163 | (stwx rzero idx res)
|
|---|
| 1164 | (addi len len '1)
|
|---|
| 1165 | (cmpw len oldlen)
|
|---|
| 1166 | (addi idx idx '1)
|
|---|
| 1167 | (blt @loop2)
|
|---|
| 1168 | (blr)))
|
|---|
| 1169 |
|
|---|
| 1170 | (defppclapfunction %count-digit-leading-zeros ((high arg_y) (low arg_z))
|
|---|
| 1171 | (compose-digit imm0 high low)
|
|---|
| 1172 | (cntlzw imm0 imm0)
|
|---|
| 1173 | (box-fixnum arg_z imm0)
|
|---|
| 1174 | (blr))
|
|---|
| 1175 |
|
|---|
| 1176 | (defppclapfunction %count-digit-trailing-zeros ((high arg_y) (low arg_z))
|
|---|
| 1177 | (compose-digit imm0 high low)
|
|---|
| 1178 | (neg imm1 imm0)
|
|---|
| 1179 | (and imm0 imm0 imm1)
|
|---|
| 1180 | (cntlzw imm0 imm0)
|
|---|
| 1181 | (subfic imm0 imm0 31)
|
|---|
| 1182 | (box-fixnum arg_z imm0)
|
|---|
| 1183 | (blr))
|
|---|
| 1184 |
|
|---|
| 1185 |
|
|---|
| 1186 | (defppclapfunction %bignum-count-trailing-zero-bits ((bignum arg_z))
|
|---|
| 1187 | (let ((ndigits arg_x)
|
|---|
| 1188 | (nbits arg_y)
|
|---|
| 1189 | (digit imm0)
|
|---|
| 1190 | (ptr imm1))
|
|---|
| 1191 | (li ptr ppc32::misc-data-offset)
|
|---|
| 1192 | (li ndigits '-32)
|
|---|
| 1193 | @next
|
|---|
| 1194 | (lwzx digit bignum ptr)
|
|---|
| 1195 | (cmpwi digit 0)
|
|---|
| 1196 | (la ptr 4 ptr)
|
|---|
| 1197 | (addi ndigits ndigits '32)
|
|---|
| 1198 | (beq @next)
|
|---|
| 1199 | (neg ptr digit)
|
|---|
| 1200 | (and digit digit ptr)
|
|---|
| 1201 | (cntlzw digit digit)
|
|---|
| 1202 | (subfic digit digit 31)
|
|---|
| 1203 | (box-fixnum nbits digit)
|
|---|
| 1204 | (add arg_z nbits ndigits)
|
|---|
| 1205 | (blr)))
|
|---|
| 1206 |
|
|---|
| 1207 |
|
|---|
| 1208 | (defppclapfunction %bignum-trim-leading-zeros ((bignum arg_x)
|
|---|
| 1209 | (start arg_y)
|
|---|
| 1210 | (len arg_z))
|
|---|
| 1211 | (add imm1 start len)
|
|---|
| 1212 | (la imm1 (- ppc32::misc-data-offset 4) imm1)
|
|---|
| 1213 | @loop
|
|---|
| 1214 | (cmpwi cr0 len '1)
|
|---|
| 1215 | (lwzx imm0 bignum imm1)
|
|---|
| 1216 | (cmpwi cr1 imm0 0)
|
|---|
| 1217 | (la imm1 -4 imm1)
|
|---|
| 1218 | (bnelr cr1)
|
|---|
| 1219 | (la len '-1 len)
|
|---|
| 1220 | (bne @loop)
|
|---|
| 1221 | (blr))
|
|---|
| 1222 |
|
|---|
| 1223 | ;;; Set length of bignum to new-len (zeroing out any trailing words between
|
|---|
| 1224 | ;;; the old length and the new.
|
|---|
| 1225 | (defppclapfunction %shrink-bignum ((new-len arg_y) (bignum arg_z))
|
|---|
| 1226 | (let ((old-len temp0)
|
|---|
| 1227 | (old-idx imm0)
|
|---|
| 1228 | (new-idx imm2)
|
|---|
| 1229 | (header imm1))
|
|---|
| 1230 | (getvheader header bignum)
|
|---|
| 1231 | (header-length old-len header)
|
|---|
| 1232 | (cmpw old-len new-len)
|
|---|
| 1233 | (la old-idx ppc32::misc-data-offset old-len)
|
|---|
| 1234 | (la new-idx ppc32::misc-data-offset new-len)
|
|---|
| 1235 | (beqlr)
|
|---|
| 1236 | @loop
|
|---|
| 1237 | (subi old-idx old-idx 4)
|
|---|
| 1238 | (cmpw old-idx new-idx)
|
|---|
| 1239 | (stwx ppc32::rzero bignum old-idx)
|
|---|
| 1240 | (bne @loop)
|
|---|
| 1241 | (slwi header new-len (- ppc32::num-subtag-bits ppc32::fixnumshift))
|
|---|
| 1242 | (ori header header ppc32::subtag-bignum)
|
|---|
| 1243 | (stw header ppc32::misc-header-offset bignum)
|
|---|
| 1244 | (blr)))
|
|---|
| 1245 |
|
|---|
| 1246 | ;;; Especially when large operands are involved, the GNU Multiple Precision
|
|---|
| 1247 | ;;; library's algorithm's are often faster than Clozure CL's. GMP's MPN
|
|---|
| 1248 | ;;; library defines operations on "limb vectors", which are basically
|
|---|
| 1249 | ;;; just sequences of 32-bit digits (least-significant digit first), which
|
|---|
| 1250 | ;;; is just about exactly the same way that Clozure CL stores bignums.
|
|---|
| 1251 | ;;; We might want to (eventually) link some or all of GMP into Clozure CL;
|
|---|
| 1252 | ;;; in the meantime, it seems that we get some performance benefit from
|
|---|
| 1253 | ;;; using GMP representation and algorithms in some mixture of LAP and Lisp.
|
|---|
| 1254 | ;;; To approximate the "limb vector" representation, we copy operands to
|
|---|
| 1255 | ;;; (and results from) stack-allocated macptrs. Since those macptrs are
|
|---|
| 1256 | ;;; word-aligned, we can use fixnums to represent word-aligned pointers.
|
|---|
| 1257 | ;;; Obviously, it costs a little to copy back and forth like this; we
|
|---|
| 1258 | ;;; only win when operands are fairly large, and when we can replace an
|
|---|
| 1259 | ;;; N^2 algorithm with something cheaper.
|
|---|
| 1260 |
|
|---|
| 1261 | ;;; Macptr MUST be word-aligned (low 2 bits must be 0). Extract
|
|---|
| 1262 | ;;; such an address, return it as a fixnum.
|
|---|
| 1263 | (defppclapfunction macptr->fixnum ((ptr arg_z))
|
|---|
| 1264 | (macptr-ptr arg_z ptr)
|
|---|
| 1265 | (blr))
|
|---|
| 1266 |
|
|---|
| 1267 | ;;; Copy the limb SRC points to to where DEST points.
|
|---|
| 1268 | (defppclapfunction copy-limb ((src arg_y) (dest arg_z))
|
|---|
| 1269 | (lwz imm0 0 src)
|
|---|
| 1270 | (stw imm0 0 dest)
|
|---|
| 1271 | (blr))
|
|---|
| 1272 |
|
|---|
| 1273 | ;;; Return T iff LIMB contains 0.
|
|---|
| 1274 | (defppclapfunction limb-zerop ((limb arg_z))
|
|---|
| 1275 | (lwz imm0 0 limb)
|
|---|
| 1276 | (cntlzw imm0 imm0)
|
|---|
| 1277 | (srwi imm0 imm0 5)
|
|---|
| 1278 | (bit0->boolean arg_z imm0 imm0)
|
|---|
| 1279 | (blr))
|
|---|
| 1280 |
|
|---|
| 1281 | ;;; Return -1,0,1 according to whether the contents of Y are
|
|---|
| 1282 | ;;; <,=,> the contents of Z.
|
|---|
| 1283 | (defppclapfunction compare-limbs ((y arg_y) (z arg_z))
|
|---|
| 1284 | (lwz imm1 0 z)
|
|---|
| 1285 | (lwz imm0 0 y)
|
|---|
| 1286 | (cmplw imm0 imm1)
|
|---|
| 1287 | (li arg_z 0)
|
|---|
| 1288 | (beqlr)
|
|---|
| 1289 | (li arg_z '1)
|
|---|
| 1290 | (bgtlr)
|
|---|
| 1291 | (li arg_z '-1)
|
|---|
| 1292 | (blr))
|
|---|
| 1293 |
|
|---|
| 1294 | ;;; Add a fixnum to the limb LIMB points to. Ignore overflow.
|
|---|
| 1295 | (defppclapfunction add-fixnum-to-limb ((fixnum arg_y) (limb arg_z))
|
|---|
| 1296 | (unbox-fixnum imm0 fixnum)
|
|---|
| 1297 | (lwz imm1 0 limb)
|
|---|
| 1298 | (add imm1 imm1 imm0)
|
|---|
| 1299 | (stw imm1 0 limb)
|
|---|
| 1300 | (blr))
|
|---|
| 1301 |
|
|---|
| 1302 | ;;; Store a fixnum value where LIMB points.
|
|---|
| 1303 | (defppclapfunction copy-fixnum-to-limb ((fixnum arg_y) (limb arg_z))
|
|---|
| 1304 | (unbox-fixnum imm0 fixnum)
|
|---|
| 1305 | (stwu imm0 0 limb)
|
|---|
| 1306 | (blr))
|
|---|
| 1307 |
|
|---|
| 1308 | ;;; Increment a "LIMB VECTOR" (bignum) by a small amount. The caller
|
|---|
| 1309 | ;;; knows that carries will only propagate for a word or two.
|
|---|
| 1310 | (defppclapfunction mpn-incr-u ((limb arg_y) (fixby arg_z))
|
|---|
| 1311 | (let ((by imm0)
|
|---|
| 1312 | (sum imm1))
|
|---|
| 1313 | (unbox-fixnum by fixby)
|
|---|
| 1314 | @loop
|
|---|
| 1315 | (lwz sum 0 limb)
|
|---|
| 1316 | (add sum sum by)
|
|---|
| 1317 | (cmplw sum by)
|
|---|
| 1318 | (stw sum 0 limb)
|
|---|
| 1319 | (li by 1)
|
|---|
| 1320 | (la limb 4 limb)
|
|---|
| 1321 | (blt @loop)
|
|---|
| 1322 | (blr)))
|
|---|
| 1323 |
|
|---|
| 1324 | ;;; Store XP-YP at WP; return carry (0 or 1).
|
|---|
| 1325 | ;;; wp, xp, yp: word-aligned, unboxed ptrs (fixnums)
|
|---|
| 1326 | ;;; size: boxed fixnum
|
|---|
| 1327 | ;;; returns boxed carry
|
|---|
| 1328 | (defppclapfunction mpn-sub-n ((wp 0) (xp arg_x) (yp arg_y) (size arg_z))
|
|---|
| 1329 | (vpop imm0)
|
|---|
| 1330 | (subi size size '1)
|
|---|
| 1331 | (cmpwi size 0)
|
|---|
| 1332 | (lwz imm3 0 xp)
|
|---|
| 1333 | (lwz imm4 0 yp)
|
|---|
| 1334 | (sub imm1 xp imm0) ; imm1 = xp-wp
|
|---|
| 1335 | (sub imm2 yp imm0) ; imm2 = yp-wp
|
|---|
| 1336 | (addi imm1 imm1 4) ; imm1 = xp-wp+4
|
|---|
| 1337 | (addi imm2 imm2 4) ; imm2 = yp-wp+4
|
|---|
| 1338 | (subfc imm3 imm4 imm3)
|
|---|
| 1339 | (stw imm3 0 imm0) ; wp[0]
|
|---|
| 1340 | (beq @done)
|
|---|
| 1341 | @top
|
|---|
| 1342 | (subi size size '1)
|
|---|
| 1343 | (cmpwi size 0)
|
|---|
| 1344 | (lwzx imm3 imm1 imm0) ; imm3 = xp[i]
|
|---|
| 1345 | (lwzx imm4 imm2 imm0) ; imm4 = xp[i]
|
|---|
| 1346 | (subfe imm3 imm4 imm3)
|
|---|
| 1347 | (stwu imm3 4 imm0)
|
|---|
| 1348 | (bne @top)
|
|---|
| 1349 | @done
|
|---|
| 1350 | (subfe imm0 rzero rzero)
|
|---|
| 1351 | (subfic imm0 imm0 0)
|
|---|
| 1352 | (box-fixnum arg_z imm0)
|
|---|
| 1353 | (blr))
|
|---|
| 1354 |
|
|---|
| 1355 | ;;; Store XP+YP at WP; return carry (0 or 1).
|
|---|
| 1356 | ;;; wp, xp, yp = word-aligned, unboxed macptrs (fixnums).
|
|---|
| 1357 | ;;; size = boxed fixnum
|
|---|
| 1358 | ;;; result = boxed carry
|
|---|
| 1359 | (defppclapfunction mpn-add-n ((wp 0) (xp arg_x) (yp arg_y) (size arg_z))
|
|---|
| 1360 | (vpop imm0)
|
|---|
| 1361 | (subi size size '1)
|
|---|
| 1362 | (cmpwi size 0)
|
|---|
| 1363 | (lwz imm3 0 xp)
|
|---|
| 1364 | (lwz imm4 0 yp)
|
|---|
| 1365 | (sub imm1 xp imm0) ; imm1 = xp-wp
|
|---|
| 1366 | (sub imm2 yp imm0) ; imm2 = yp-wp
|
|---|
| 1367 | (addi imm1 imm1 4) ; imm1 = xp-wp+4
|
|---|
| 1368 | (addi imm2 imm2 4) ; imm2 = yp-wp+4
|
|---|
| 1369 | (addc imm3 imm3 imm4)
|
|---|
| 1370 | (stw imm3 0 imm0) ; wp[0]
|
|---|
| 1371 | (beq @done)
|
|---|
| 1372 | @top
|
|---|
| 1373 | (subi size size '1)
|
|---|
| 1374 | (cmpwi size 0)
|
|---|
| 1375 | (lwzx imm3 imm1 imm0) ; imm3 = xp[i]
|
|---|
| 1376 | (lwzx imm4 imm2 imm0) ; imm4 = xp[i]
|
|---|
| 1377 | (adde imm3 imm4 imm3)
|
|---|
| 1378 | (stwu imm3 4 imm0)
|
|---|
| 1379 | (bne @top)
|
|---|
| 1380 | @done
|
|---|
| 1381 | (addze imm0 rzero)
|
|---|
| 1382 | (box-fixnum arg_z imm0)
|
|---|
| 1383 | (blr))
|
|---|
| 1384 |
|
|---|
| 1385 | ;;; Add the single limb LIMB to S1P (propagating carry.) Store the
|
|---|
| 1386 | ;;; result at RP. RP and S1P may be the same place, so check for
|
|---|
| 1387 | ;;; that and do nothing after carry stops propagating. Return carry.
|
|---|
| 1388 | (defppclapfunction mpn-add-1 ((rp-offset 0) (s1p arg_x) (size arg_y) (limb arg_z))
|
|---|
| 1389 | (let ((rp temp0))
|
|---|
| 1390 | (vpop rp)
|
|---|
| 1391 | (subi size size '1)
|
|---|
| 1392 | (cmpwi cr2 size 0)
|
|---|
| 1393 | (cmpw cr1 rp s1p) ;a common case
|
|---|
| 1394 | (subi rp rp 4)
|
|---|
| 1395 | (subi s1p s1p 4)
|
|---|
| 1396 | (lwz imm0 0 limb)
|
|---|
| 1397 | (lwzu imm1 4 s1p)
|
|---|
| 1398 | (addc imm1 imm1 imm0)
|
|---|
| 1399 | (addze. imm0 rzero)
|
|---|
| 1400 | (stwu imm1 4 rp)
|
|---|
| 1401 | (beq cr2 @done)
|
|---|
| 1402 | @top
|
|---|
| 1403 | (beq cr0 @finish) ; branch if no more carry
|
|---|
| 1404 | (subi size size '1)
|
|---|
| 1405 | (cmpwi cr2 size 0)
|
|---|
| 1406 | (lwzu imm1 4 s1p)
|
|---|
| 1407 | (addc imm1 imm1 imm0)
|
|---|
| 1408 | (addze. imm0 rzero)
|
|---|
| 1409 | (stwu imm1 4 rp)
|
|---|
| 1410 | (bne cr2 @top)
|
|---|
| 1411 | (box-fixnum arg_z imm0)
|
|---|
| 1412 | (blr)
|
|---|
| 1413 | @finish
|
|---|
| 1414 | (beq cr1 @done)
|
|---|
| 1415 | @loop
|
|---|
| 1416 | (subi size size '1)
|
|---|
| 1417 | (cmpwi cr2 size 0)
|
|---|
| 1418 | (lwzu imm1 4 s1p)
|
|---|
| 1419 | (stwu imm1 4 rp)
|
|---|
| 1420 | (bne cr2 @loop)
|
|---|
| 1421 | @done
|
|---|
| 1422 | (box-fixnum arg_z imm0)
|
|---|
| 1423 | (blr)))
|
|---|
| 1424 | ;;; Multiply the limb vector S1 by the single limb at LIMBPTR, storing
|
|---|
| 1425 | ;;; the result at RES. Store the "carry out" (high word of last 64-bit
|
|---|
| 1426 | ;;; partial product) at the limb RESULT.
|
|---|
| 1427 | ;;; res, s1, limbptr, result:
|
|---|
| 1428 | ;;; unboxed, word-aligned ptrs (fixnums). size: boxed fixnum
|
|---|
| 1429 | ;;; It'd be hard to transliterate the GMP code here; the GMP version
|
|---|
| 1430 | ;;; uses lots more immediate registers than we can easily use in LAP
|
|---|
| 1431 | ;;; (and is much more aggressively pipelined).
|
|---|
| 1432 | (defppclapfunction mpn-mul-1 ((res-offset 4)
|
|---|
| 1433 | (s1-offset 0)
|
|---|
| 1434 | (size arg_x)
|
|---|
| 1435 | (limbptr arg_y)
|
|---|
| 1436 | (result arg_z))
|
|---|
| 1437 | (let ((limb imm0)
|
|---|
| 1438 | (resptr temp0)
|
|---|
| 1439 | (s1 temp1)
|
|---|
| 1440 | (src imm1)
|
|---|
| 1441 | (prod-low imm2)
|
|---|
| 1442 | (prod-high imm3)
|
|---|
| 1443 | (carry imm4))
|
|---|
| 1444 | (lwz resptr res-offset vsp)
|
|---|
| 1445 | (lwz s1 s1-offset vsp)
|
|---|
| 1446 | (la vsp 8 vsp)
|
|---|
| 1447 | (la resptr -4 resptr) ; pre-decrement
|
|---|
| 1448 | (la s1 -4 s1)
|
|---|
| 1449 | (addic carry carry 0)
|
|---|
| 1450 | (li carry 0)
|
|---|
| 1451 | (lwz limb 0 limbptr)
|
|---|
| 1452 | @loop
|
|---|
| 1453 | (subi size size '1)
|
|---|
| 1454 | (cmpwi size 0)
|
|---|
| 1455 | (lwzu src 4 s1)
|
|---|
| 1456 | (mulhwu prod-high src limb)
|
|---|
| 1457 | (mullw prod-low src limb)
|
|---|
| 1458 | (addc prod-low prod-low carry)
|
|---|
| 1459 | (addze carry prod-high)
|
|---|
| 1460 | (stwu prod-low 4 resptr)
|
|---|
| 1461 | (bne @loop)
|
|---|
| 1462 | (stw carry 0 result)
|
|---|
| 1463 | (blr)))
|
|---|
| 1464 |
|
|---|
| 1465 | ;;; multiply s1*limb and add result to res
|
|---|
| 1466 | ;;; res, s1, limbptr, result:
|
|---|
| 1467 | ;;; unboxed, word-aligned ptrs (fixnums).
|
|---|
| 1468 | ;;; size: boxed fixnum
|
|---|
| 1469 | ;;; limbptr: source "limb".
|
|---|
| 1470 | ;;; result: carry out (high word of product).
|
|---|
| 1471 | (defppclapfunction mpn-addmul-1 ((res-offset 4)
|
|---|
| 1472 | (s1-offset 0)
|
|---|
| 1473 | (size arg_x)
|
|---|
| 1474 | (limbptr arg_y)
|
|---|
| 1475 | (result arg_z))
|
|---|
| 1476 | (let ((limb imm0)
|
|---|
| 1477 | (resptr temp0)
|
|---|
| 1478 | (s1 temp1)
|
|---|
| 1479 | (src imm1)
|
|---|
| 1480 | (prod-low imm2)
|
|---|
| 1481 | (prod-high imm3)
|
|---|
| 1482 | (carry imm4)
|
|---|
| 1483 | (prev imm4))
|
|---|
| 1484 | (lwz resptr res-offset vsp)
|
|---|
| 1485 | (lwz s1 s1-offset vsp)
|
|---|
| 1486 | (la vsp 8 vsp)
|
|---|
| 1487 | (la resptr -4 resptr) ; pre-decrement
|
|---|
| 1488 | (la s1 -4 s1)
|
|---|
| 1489 | (addic carry carry 0)
|
|---|
| 1490 | (li carry 0)
|
|---|
| 1491 | (lwz limb 0 limbptr)
|
|---|
| 1492 | @loop
|
|---|
| 1493 | (subi size size '1)
|
|---|
| 1494 | (cmpwi size 0)
|
|---|
| 1495 | (lwzu src 4 s1)
|
|---|
| 1496 | (mulhwu prod-high src limb)
|
|---|
| 1497 | (mullw prod-low src limb)
|
|---|
| 1498 | (addc prod-low prod-low carry)
|
|---|
| 1499 | (addze prod-high prod-high)
|
|---|
| 1500 | (lwz prev 4 resptr)
|
|---|
| 1501 | (addc prev prev prod-low)
|
|---|
| 1502 | (stwu prev 4 resptr)
|
|---|
| 1503 | (addze carry prod-high)
|
|---|
| 1504 | (bne @loop)
|
|---|
| 1505 | (stw carry 0 result)
|
|---|
| 1506 | (blr)))
|
|---|
| 1507 |
|
|---|
| 1508 | ;;; Multiply the UN-word limb vector at UP and the VN-word limb vector
|
|---|
| 1509 | ;;; at VP, store the result at RP.
|
|---|
| 1510 | (defppclapfunction mpn-mul-basecase ((rp-offset 4)
|
|---|
| 1511 | (up-offset 0)
|
|---|
| 1512 | (un arg_x)
|
|---|
| 1513 | (vp arg_y)
|
|---|
| 1514 | (vn arg_z))
|
|---|
| 1515 | (let ((resptr temp0)
|
|---|
| 1516 | (s1 temp1)
|
|---|
| 1517 | (up temp2)
|
|---|
| 1518 | (rp temp3)
|
|---|
| 1519 | (size nargs)
|
|---|
| 1520 | (limb imm0)
|
|---|
| 1521 | (src imm1)
|
|---|
| 1522 | (prod-low imm2)
|
|---|
| 1523 | (prod-high imm3)
|
|---|
| 1524 | (prev imm4)
|
|---|
| 1525 | (carry imm4))
|
|---|
| 1526 | (lwz resptr rp-offset vsp)
|
|---|
| 1527 | (la rp -4 resptr)
|
|---|
| 1528 | (lwz up up-offset vsp)
|
|---|
| 1529 | (la s1 -4 up)
|
|---|
| 1530 | (la vsp 8 vsp)
|
|---|
| 1531 | (mr size un)
|
|---|
| 1532 | (lwz limb 0 vp)
|
|---|
| 1533 | (subi vn vn '1)
|
|---|
| 1534 | (cmpwi cr2 vn 0)
|
|---|
| 1535 | (li carry 0)
|
|---|
| 1536 | @mul-1-loop
|
|---|
| 1537 | (subi size size '1)
|
|---|
| 1538 | (cmpwi size 0)
|
|---|
| 1539 | (lwzu src 4 s1)
|
|---|
| 1540 | (mulhwu prod-high src limb)
|
|---|
| 1541 | (mullw prod-low src limb)
|
|---|
| 1542 | (addc prod-low prod-low carry)
|
|---|
| 1543 | (addze carry prod-high)
|
|---|
| 1544 | (stwu prod-low 4 rp)
|
|---|
| 1545 | (bne @mul-1-loop)
|
|---|
| 1546 | (stw carry 4 rp)
|
|---|
| 1547 | @again
|
|---|
| 1548 | (beq cr2 @done)
|
|---|
| 1549 | (subi vn vn '1)
|
|---|
| 1550 | (cmpwi cr2 vn 0)
|
|---|
| 1551 | (mr rp resptr)
|
|---|
| 1552 | (la resptr 4 resptr)
|
|---|
| 1553 | (la s1 -4 up)
|
|---|
| 1554 | (lwzu limb 4 vp)
|
|---|
| 1555 | (mr size un)
|
|---|
| 1556 | (addic carry carry 0)
|
|---|
| 1557 | (li carry 0)
|
|---|
| 1558 | @addmul-1-loop
|
|---|
| 1559 | (subi size size '1)
|
|---|
| 1560 | (cmpwi size 0)
|
|---|
| 1561 | (lwzu src 4 s1)
|
|---|
| 1562 | (mulhwu prod-high src limb)
|
|---|
| 1563 | (mullw prod-low src limb)
|
|---|
| 1564 | (addc prod-low prod-low carry)
|
|---|
| 1565 | (addze prod-high prod-high)
|
|---|
| 1566 | (lwz prev 4 rp)
|
|---|
| 1567 | (addc prev prev prod-low)
|
|---|
| 1568 | (stwu prev 4 rp)
|
|---|
| 1569 | (addze carry prod-high)
|
|---|
| 1570 | (bne @addmul-1-loop)
|
|---|
| 1571 | (stw carry 4 rp)
|
|---|
| 1572 | (b @again)
|
|---|
| 1573 | @done
|
|---|
| 1574 | (li arg_z (target-nil-value))
|
|---|
| 1575 | (blr)))
|
|---|
| 1576 |
|
|---|
| 1577 | ;;; left-shift src by 1 bit, storing result at res. Return
|
|---|
| 1578 | ;;; the bit that was shifted out.
|
|---|
| 1579 | (defppclapfunction mpn-lshift-1 ((resptr arg_x) (s1ptr arg_y) (size-arg arg_z))
|
|---|
| 1580 | (let ((size temp0)
|
|---|
| 1581 | (last-bit imm0)
|
|---|
| 1582 | (prev imm1)
|
|---|
| 1583 | (curr imm2)
|
|---|
| 1584 | (sleft imm3)
|
|---|
| 1585 | (sright imm4))
|
|---|
| 1586 | (subi size size-arg '1)
|
|---|
| 1587 | (cmpwi size 0)
|
|---|
| 1588 | (add resptr resptr size-arg)
|
|---|
| 1589 | (add s1ptr s1ptr size-arg)
|
|---|
| 1590 | (lwzu prev -4 s1ptr)
|
|---|
| 1591 | (srwi last-bit prev 31)
|
|---|
| 1592 | (box-fixnum arg_z last-bit)
|
|---|
| 1593 | (beq @end1)
|
|---|
| 1594 | @loop
|
|---|
| 1595 | (subi size size '1)
|
|---|
| 1596 | (cmpwi size 0)
|
|---|
| 1597 | (lwzu curr -4 s1ptr)
|
|---|
| 1598 | (slwi sleft prev 1)
|
|---|
| 1599 | (srwi sright curr 31)
|
|---|
| 1600 | (or sright sright sleft)
|
|---|
| 1601 | (stwu sright -4 resptr)
|
|---|
| 1602 | (beq @end2)
|
|---|
| 1603 | (subi size size '1)
|
|---|
| 1604 | (cmpwi size 0)
|
|---|
| 1605 | (lwzu prev -4 s1ptr)
|
|---|
| 1606 | (slwi sleft curr 1)
|
|---|
| 1607 | (srwi sright prev 31)
|
|---|
| 1608 | (or sright sright sleft)
|
|---|
| 1609 | (stwu sright -4 resptr)
|
|---|
| 1610 | (bne @loop)
|
|---|
| 1611 | @end1
|
|---|
| 1612 | (slwi sleft prev 1)
|
|---|
| 1613 | (stwu sleft -4 resptr)
|
|---|
| 1614 | (blr)
|
|---|
| 1615 | @end2
|
|---|
| 1616 | (slwi sleft curr 1)
|
|---|
| 1617 | (stwu sleft -4 resptr)
|
|---|
| 1618 | (blr)))
|
|---|
| 1619 |
|
|---|
| 1620 | ;;; Do a 32x32=64 unsigned multiply of the words at X and Y. Store
|
|---|
| 1621 | ;;; result (low word first) at RESULT.
|
|---|
| 1622 | (defppclapfunction umulppm ((x arg_x) (y arg_y) (result arg_z))
|
|---|
| 1623 | (lwz imm0 0 x)
|
|---|
| 1624 | (lwz imm1 0 y)
|
|---|
| 1625 | (mullw imm2 imm0 imm1)
|
|---|
| 1626 | (mulhwu imm3 imm0 imm1)
|
|---|
| 1627 | (stw imm2 0 result)
|
|---|
| 1628 | (stw imm3 4 result)
|
|---|
| 1629 | (blr))
|
|---|
| 1630 |
|
|---|
| 1631 |
|
|---|
| 1632 | ;;; for truncate-by-fixnum etal
|
|---|
| 1633 | ;;; doesnt store quotient - just returns rem in 2 halves
|
|---|
| 1634 | (defppclapfunction %floor-loop-no-quo ((q arg_x)(yhi arg_y)(ylo arg_z))
|
|---|
| 1635 | (let ((a imm1)
|
|---|
| 1636 | (b imm2)
|
|---|
| 1637 | (y imm3)
|
|---|
| 1638 | (quo imm0)
|
|---|
| 1639 | (qidx temp0)
|
|---|
| 1640 | (qlen temp1))
|
|---|
| 1641 | (lwz imm4 (- ppc32::fulltag-misc) q)
|
|---|
| 1642 | (header-length qlen imm4)
|
|---|
| 1643 | (subi qidx qlen 4)
|
|---|
| 1644 | (mr b rzero)
|
|---|
| 1645 | (compose-digit y yhi ylo)
|
|---|
| 1646 | @loop
|
|---|
| 1647 | (rlwinm a b -16 16 31)
|
|---|
| 1648 | (rlwinm b b 16 0 15)
|
|---|
| 1649 | (la imm4 ppc32::misc-data-offset q)
|
|---|
| 1650 | (lwzx imm4 qidx imm4) ; q contents
|
|---|
| 1651 | (rlwimi b imm4 16 16 31) ; hi 16 to lo b
|
|---|
| 1652 | ;(dbg)
|
|---|
| 1653 | (48x32-divide a b y fp0 fp1 fp2 imm4)
|
|---|
| 1654 | (fctiwz fp0 fp0)
|
|---|
| 1655 | (stwu tsp -32 tsp)
|
|---|
| 1656 | (stw tsp 4 tsp)
|
|---|
| 1657 | (stfd fp0 24 tsp)
|
|---|
| 1658 | (lwz quo (+ 24 4) tsp) ; 16 quo bits above stuff used by 48x32
|
|---|
| 1659 | ; now mul quo by y
|
|---|
| 1660 | (mullw imm4 y quo)
|
|---|
| 1661 | ; and subtract from a,b
|
|---|
| 1662 | (subfc b imm4 b)
|
|---|
| 1663 | ; new a and b are low 2 digits of this (b) and last digit in array
|
|---|
| 1664 | ; and do it again on low 3 digits
|
|---|
| 1665 | ;(dbg)
|
|---|
| 1666 | (rlwinm a b -16 16 31)
|
|---|
| 1667 | (rlwinm b b 16 0 15)
|
|---|
| 1668 | (la imm4 ppc32::misc-data-offset q)
|
|---|
| 1669 | (lwzx imm4 qidx imm4)
|
|---|
| 1670 | (rlwimi b imm4 0 16 31)
|
|---|
| 1671 | (48x32-divide a b y fp0 fp1 fp2 imm4)
|
|---|
| 1672 | (fctiwz fp0 fp0)
|
|---|
| 1673 | (stfd fp0 16 tsp) ; quo lo
|
|---|
| 1674 | (subi qidx qidx 4)
|
|---|
| 1675 | (cmpwi :cr1 qidx 0)
|
|---|
| 1676 | (lwz quo (+ 16 4) tsp)
|
|---|
| 1677 | (lwz tsp 0 tsp)
|
|---|
| 1678 | (mullw imm4 y quo)
|
|---|
| 1679 | (subfc b imm4 b) ; b is remainder
|
|---|
| 1680 | (bge :cr1 @loop)
|
|---|
| 1681 | (digit-h temp0 b)
|
|---|
| 1682 | (vpush temp0)
|
|---|
| 1683 | (digit-l temp0 b)
|
|---|
| 1684 | (vpush temp0)
|
|---|
| 1685 | (la temp0 8 vsp)
|
|---|
| 1686 | (set-nargs 2)
|
|---|
| 1687 | (ba .SPvalues)))
|
|---|
| 1688 |
|
|---|
| 1689 |
|
|---|
| 1690 | ; store result in dest, return rem in 2 halves
|
|---|
| 1691 | (defppclapfunction %floor-loop-quo ((q-stk 0)(dest arg_x)(yhi arg_y)(ylo arg_z))
|
|---|
| 1692 | (let ((a imm1)
|
|---|
| 1693 | (b imm2)
|
|---|
| 1694 | (y imm3)
|
|---|
| 1695 | (quo imm0)
|
|---|
| 1696 | (qidx temp0)
|
|---|
| 1697 | (qlen temp1)
|
|---|
| 1698 | (q temp2))
|
|---|
| 1699 | (vpop q)
|
|---|
| 1700 | (lwz imm4 (- ppc32::fulltag-misc) q)
|
|---|
| 1701 | (header-length qlen imm4)
|
|---|
| 1702 | (subi qidx qlen 4)
|
|---|
| 1703 | (mr b rzero)
|
|---|
| 1704 | (compose-digit y yhi ylo)
|
|---|
| 1705 | @loop
|
|---|
| 1706 | (rlwinm a b -16 16 31)
|
|---|
| 1707 | (rlwinm b b 16 0 15)
|
|---|
| 1708 | (la imm4 ppc32::misc-data-offset q)
|
|---|
| 1709 | (lwzx imm4 qidx imm4) ; q contents
|
|---|
| 1710 | (rlwimi b imm4 16 16 31) ; hi 16 to lo b
|
|---|
| 1711 | (48x32-divide a b y fp0 fp1 fp2 imm4)
|
|---|
| 1712 | (fctiwz fp0 fp0)
|
|---|
| 1713 | (stwu tsp -32 tsp)
|
|---|
| 1714 | (stw tsp 4 tsp)
|
|---|
| 1715 | (stfd fp0 24 tsp)
|
|---|
| 1716 | (lwz quo (+ 24 4) tsp) ; 16 quo bits above stuff used by 48x32
|
|---|
| 1717 | ; now mul quo by y
|
|---|
| 1718 | (mullw imm4 y quo)
|
|---|
| 1719 | ; and subtract from a,b
|
|---|
| 1720 | (subfc b imm4 b)
|
|---|
| 1721 | ; new a and b are low 2 digits of this (b) and last digit in array
|
|---|
| 1722 | ; and do it again on low 3 digits
|
|---|
| 1723 | ;(dbg)
|
|---|
| 1724 | (rlwinm a b -16 16 31)
|
|---|
| 1725 | (rlwinm b b 16 0 15)
|
|---|
| 1726 | (la imm4 ppc32::misc-data-offset q)
|
|---|
| 1727 | (lwzx imm4 qidx imm4)
|
|---|
| 1728 | (rlwimi b imm4 0 16 31)
|
|---|
| 1729 | (48x32-divide a b y fp0 fp1 fp2 imm4)
|
|---|
| 1730 | (fctiwz fp0 fp0)
|
|---|
| 1731 | (stfd fp0 16 tsp) ; quo lo
|
|---|
| 1732 | (lwz quo (+ 16 4) tsp)
|
|---|
| 1733 | (mullw imm4 y quo)
|
|---|
| 1734 | (subfc b imm4 b) ; b is remainder
|
|---|
| 1735 | (lwz quo (+ 24 4) tsp) ; quo-hi
|
|---|
| 1736 | (rlwinm quo quo 16 0 15)
|
|---|
| 1737 | (lwz imm4 (+ 16 4) tsp) ; quo lo
|
|---|
| 1738 | (lwz tsp 0 tsp)
|
|---|
| 1739 | (rlwimi quo imm4 0 16 31)
|
|---|
| 1740 | (la imm4 ppc32::misc-data-offset dest)
|
|---|
| 1741 | (stwx quo qidx imm4)
|
|---|
| 1742 | (subic. qidx qidx 4)
|
|---|
| 1743 | (bge @loop)
|
|---|
| 1744 | (digit-h temp0 b)
|
|---|
| 1745 | (vpush temp0)
|
|---|
| 1746 | (digit-l temp0 b)
|
|---|
| 1747 | (vpush temp0)
|
|---|
| 1748 | (la temp0 8 vsp)
|
|---|
| 1749 | (set-nargs 2)
|
|---|
| 1750 | (ba .SPvalues)))
|
|---|
| 1751 |
|
|---|
| 1752 | ;;; get xidx thing from x, yidx thing from y if same return #xffff
|
|---|
| 1753 | ;;; #xffff otherwise get another thing from x and 1- xidx and do as
|
|---|
| 1754 | ;;; %floor of xthing otherx ything
|
|---|
| 1755 | ;;; Huh?
|
|---|
| 1756 | (defppclapfunction %floor-99 ((x-stk 0)(xidx arg_x)(yptr arg_y)(yidx arg_z))
|
|---|
| 1757 | (let ((xptr temp0)
|
|---|
| 1758 | (a imm1)
|
|---|
| 1759 | (b imm2)
|
|---|
| 1760 | (y imm3)
|
|---|
| 1761 | (quo imm0))
|
|---|
| 1762 | (vpop xptr)
|
|---|
| 1763 | (la imm4 ppc32::misc-data-offset XIDX)
|
|---|
| 1764 | (lwzx a xptr imm4)
|
|---|
| 1765 | (la imm4 ppc32::misc-data-offset YIDX)
|
|---|
| 1766 | (lwzx y yptr imm4)
|
|---|
| 1767 | (cmpw a y)
|
|---|
| 1768 | (bne @more)
|
|---|
| 1769 | (li imm4 #xffff)
|
|---|
| 1770 | (rlwinm imm4 imm4 ppc32::fixnumshift (- 16 ppc32::fixnumshift) (- 31 ppc32::fixnum-shift))
|
|---|
| 1771 | (vpush imm4)
|
|---|
| 1772 | (vpush imm4)
|
|---|
| 1773 | (la temp0 8 vsp)
|
|---|
| 1774 | (set-nargs 2)
|
|---|
| 1775 | (ba .spvalues)
|
|---|
| 1776 | @MORE
|
|---|
| 1777 | ; a has 16 bits from ahi, bhi gets alo blo gets bhi
|
|---|
| 1778 | (la imm4 (- ppc32::misc-data-offset 4) xidx)
|
|---|
| 1779 | (lwzx b xptr imm4)
|
|---|
| 1780 | (rlwinm b b 16 16 31) ; bhi to blo
|
|---|
| 1781 | (rlwimi b a 16 0 15) ; alo to bhi
|
|---|
| 1782 | (rlwinm a a 16 16 31) ; a gets alo
|
|---|
| 1783 | (48x32-divide a b y fp0 fp1 fp2 imm4)
|
|---|
| 1784 | (fctiwz fp0 fp0)
|
|---|
| 1785 | (stwu tsp -32 tsp)
|
|---|
| 1786 | (stw tsp 4 tsp)
|
|---|
| 1787 | (stfd fp0 24 tsp)
|
|---|
| 1788 | (lwz quo (+ 24 4) tsp) ; 16 quo bits above stuff used by 48x32
|
|---|
| 1789 | ; now mul quo by y
|
|---|
| 1790 | (mullw imm4 y quo)
|
|---|
| 1791 | ; and subtract from a,b
|
|---|
| 1792 | (subfc b imm4 b)
|
|---|
| 1793 | ; AND AGAIN
|
|---|
| 1794 | (rlwinm a b -16 16 31) ; a gets b hi
|
|---|
| 1795 | (rlwinm b b 16 0 15) ; b lo to b hi
|
|---|
| 1796 | (la imm4 (- ppc32::misc-data-offset 4) xidx)
|
|---|
| 1797 | (lwzx imm4 imm4 xptr)
|
|---|
| 1798 | (rlwimi b imm4 0 16 31)
|
|---|
| 1799 | (48x32-divide a b y fp0 fp1 fp2 imm4)
|
|---|
| 1800 | (fctiwz fp0 fp0)
|
|---|
| 1801 | (stfd fp0 16 tsp) ; quo lo
|
|---|
| 1802 | (lwz quo (+ 24 4) tsp) ; quo-hi
|
|---|
| 1803 | (box-fixnum temp0 quo)
|
|---|
| 1804 | (vpush temp0)
|
|---|
| 1805 | (lwz quo (+ 16 4) tsp) ; quo lo
|
|---|
| 1806 | (lwz tsp 0 tsp)
|
|---|
| 1807 | (box-fixnum temp0 quo)
|
|---|
| 1808 | (vpush temp0)
|
|---|
| 1809 | (la temp0 8 vsp)
|
|---|
| 1810 | (set-nargs 2)
|
|---|
| 1811 | (ba .SPvalues)))
|
|---|
| 1812 |
|
|---|
| 1813 | ; End of ppc32-bignum.lisp
|
|---|