| 1 | ;;-*- Mode: Lisp; Package: CCL -*-
|
|---|
| 2 | ;;;
|
|---|
| 3 | ;;; Copyright 2006-2009 Clozure Associates
|
|---|
| 4 | ;;;
|
|---|
| 5 | ;;; Licensed under the Apache License, Version 2.0 (the "License");
|
|---|
| 6 | ;;; you may not use this file except in compliance with the License.
|
|---|
| 7 | ;;; You may obtain a copy of the License at
|
|---|
| 8 | ;;;
|
|---|
| 9 | ;;; http://www.apache.org/licenses/LICENSE-2.0
|
|---|
| 10 | ;;;
|
|---|
| 11 | ;;; Unless required by applicable law or agreed to in writing, software
|
|---|
| 12 | ;;; distributed under the License is distributed on an "AS IS" BASIS,
|
|---|
| 13 | ;;; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|---|
| 14 | ;;; See the License for the specific language governing permissions and
|
|---|
| 15 | ;;; limitations under the License.
|
|---|
| 16 |
|
|---|
| 17 |
|
|---|
| 18 | (in-package "CCL")
|
|---|
| 19 |
|
|---|
| 20 | ;;; The caller has allocated a two-digit bignum (quite likely on the stack).
|
|---|
| 21 | ;;; If we can fit in a single digit (if the high word is just a sign
|
|---|
| 22 | ;;; extension of the low word), truncate the bignum in place (the
|
|---|
| 23 | ;;; trailing words should already be zeroed.
|
|---|
| 24 | (defx86lapfunction %fixnum-to-bignum-set ((bignum arg_y) (fixnum arg_z))
|
|---|
| 25 | (movq (% fixnum) (% arg_x))
|
|---|
| 26 | (shl ($ (- 32 x8664::fixnumshift)) (% arg_x))
|
|---|
| 27 | (sar ($ (- 32 x8664::fixnumshift)) (% arg_x))
|
|---|
| 28 | (unbox-fixnum fixnum imm0)
|
|---|
| 29 | (cmp (% arg_x) (% fixnum))
|
|---|
| 30 | (je @chop)
|
|---|
| 31 | (movq (% imm0) (@ x8664::misc-data-offset (% bignum)))
|
|---|
| 32 | (single-value-return)
|
|---|
| 33 | @chop
|
|---|
| 34 | (movq ($ x8664::one-digit-bignum-header) (@ x8664::misc-header-offset (% bignum)))
|
|---|
| 35 | (movl (% imm0.l) (@ x8664::misc-data-offset (% bignum)))
|
|---|
| 36 | (single-value-return))
|
|---|
| 37 |
|
|---|
| 38 |
|
|---|
| 39 |
|
|---|
| 40 | (defx86lapfunction %multiply-and-add-loop64
|
|---|
| 41 | ((xs 16) (ys 8) #|(ra 0)|# (r arg_x) (i arg_y) (ylen arg_z))
|
|---|
| 42 | (let ((y temp2)
|
|---|
| 43 | (j temp0)
|
|---|
| 44 | (c imm2))
|
|---|
| 45 | (movq (@ xs (% rsp)) (% temp0))
|
|---|
| 46 | (movq (@ x8664::misc-data-offset (% temp0) (% i)) (% mm0)) ;x[i]
|
|---|
| 47 | (movq (@ ys (% rsp)) (% y))
|
|---|
| 48 | (xorl (%l j) (%l j))
|
|---|
| 49 | (xorl (%l c) (%l c))
|
|---|
| 50 | @loop
|
|---|
| 51 | ;; It's a pity to have to reload this every time, but there's no
|
|---|
| 52 | ;; imm3. (Give him 16 registers, and he still complains...)
|
|---|
| 53 | (movd (% mm0) (% rax))
|
|---|
| 54 | (mulq (@ x8664::misc-data-offset (% y) (% j))) ;128-bit x * y[j] in rdx:rax
|
|---|
| 55 | (addq (@ x8664::misc-data-offset (% r) (% i)) (% rax)) ;add in r[i]
|
|---|
| 56 | (adcq ($ 0) (% rdx))
|
|---|
| 57 | ;; add in carry digit
|
|---|
| 58 | (addq (% c) (% rax))
|
|---|
| 59 | (movl ($ 0) (%l c))
|
|---|
| 60 | (adcq (% rdx) (% c)) ;new carry digit
|
|---|
| 61 | (movq (% rax) (@ x8664::misc-data-offset (% r) (% i))) ;update r[i]
|
|---|
| 62 | (addq ($ '1) (% i))
|
|---|
| 63 | (addq ($ '1) (% j))
|
|---|
| 64 | (subq ($ '1) (% ylen))
|
|---|
| 65 | (ja @loop)
|
|---|
| 66 | (movq (% c) (@ x8664::misc-data-offset (% r) (% i)))
|
|---|
| 67 | (single-value-return 4)))
|
|---|
| 68 |
|
|---|
| 69 | ;;; Multiply the (32-bit) digits X and Y, producing a 64-bit result.
|
|---|
| 70 | ;;; Add the 32-bit "prev" digit and the 32-bit carry-in digit to that 64-bit
|
|---|
| 71 | ;;; result; return the halves as (VALUES high low).
|
|---|
| 72 | (defx86lapfunction %multiply-and-add4 ((x 8) #|(ra 0)|# (y arg_x) (prev arg_y) (carry-in arg_z))
|
|---|
| 73 | (let ((unboxed-x imm0)
|
|---|
| 74 | (unboxed-y imm1)
|
|---|
| 75 | (unboxed-prev imm0)
|
|---|
| 76 | (unboxed-carry-in imm0)
|
|---|
| 77 | (unboxed-low imm0)
|
|---|
| 78 | (high arg_y)
|
|---|
| 79 | (low arg_z))
|
|---|
| 80 | (pop (% ra0))
|
|---|
| 81 | (popq (% temp0))
|
|---|
| 82 | (discard-reserved-frame)
|
|---|
| 83 | (push (% ra0))
|
|---|
| 84 | (unbox-fixnum temp0 unboxed-x)
|
|---|
| 85 | (unbox-fixnum y unboxed-y)
|
|---|
| 86 | (mull (%l unboxed-y))
|
|---|
| 87 | (shlq ($ 32) (% unboxed-y))
|
|---|
| 88 | (orq (% unboxed-x) (% unboxed-y)) ; I got yer 64-bit product right here
|
|---|
| 89 | (unbox-fixnum prev unboxed-prev)
|
|---|
| 90 | (addq (% unboxed-prev) (% unboxed-y))
|
|---|
| 91 | (unbox-fixnum carry-in unboxed-carry-in)
|
|---|
| 92 | (addq (% unboxed-carry-in) (% unboxed-y))
|
|---|
| 93 | (movl (%l unboxed-y) (%l unboxed-low))
|
|---|
| 94 | (box-fixnum unboxed-low low)
|
|---|
| 95 | (shr ($ 32) (% unboxed-y))
|
|---|
| 96 | (box-fixnum unboxed-y high)
|
|---|
| 97 | (movq (% rsp) (% temp0))
|
|---|
| 98 | (pushq (% high))
|
|---|
| 99 | (pushq (% low))
|
|---|
| 100 | (set-nargs 2)
|
|---|
| 101 | (jmp-subprim .SPvalues)))
|
|---|
| 102 |
|
|---|
| 103 | (defx86lapfunction %multiply-and-add3 ((x arg_x) (y arg_y) (carry-in arg_z))
|
|---|
| 104 | (let ((unboxed-x imm0)
|
|---|
| 105 | (unboxed-y imm1)
|
|---|
| 106 | (unboxed-carry-in imm0)
|
|---|
| 107 | (unboxed-low imm0)
|
|---|
| 108 | (high arg_y)
|
|---|
| 109 | (low arg_z))
|
|---|
| 110 | (unbox-fixnum arg_x unboxed-x)
|
|---|
| 111 | (unbox-fixnum y unboxed-y)
|
|---|
| 112 | (mull (%l unboxed-y))
|
|---|
| 113 | (shlq ($ 32) (% unboxed-y))
|
|---|
| 114 | (orq (% unboxed-x) (% unboxed-y))
|
|---|
| 115 | (unbox-fixnum carry-in unboxed-carry-in)
|
|---|
| 116 | (addq (% unboxed-carry-in) (% unboxed-y))
|
|---|
| 117 | (movl (%l unboxed-y) (%l unboxed-low))
|
|---|
| 118 | (box-fixnum unboxed-low low)
|
|---|
| 119 | (shr ($ 32) (% unboxed-y))
|
|---|
| 120 | (box-fixnum unboxed-y high)
|
|---|
| 121 | (movq (% rsp) (% temp0))
|
|---|
| 122 | (pushq (% high))
|
|---|
| 123 | (pushq (% low))
|
|---|
| 124 | (set-nargs 2)
|
|---|
| 125 | (jmp-subprim .SPvalues)))
|
|---|
| 126 |
|
|---|
| 127 |
|
|---|
| 128 | (defx86lapfunction %multiply-and-add-fixnum-loop ((len64 8) #||(ra 0)||# (x arg_x) (y arg_y) (result arg_z))
|
|---|
| 129 | (let ((carry imm2)
|
|---|
| 130 | (i temp0)
|
|---|
| 131 | (rlen temp1))
|
|---|
| 132 | (movq (@ len64 (% rsp)) (% rlen))
|
|---|
| 133 | (xorl (%l carry) (%l carry))
|
|---|
| 134 | (xorl (%l i) (%l i))
|
|---|
| 135 | (jmp @test)
|
|---|
| 136 | @loop
|
|---|
| 137 | (unbox-fixnum y rax)
|
|---|
| 138 | (mulq (@ x8664::misc-data-offset (% x) (% i)))
|
|---|
| 139 | (addq (% carry) (% rax))
|
|---|
| 140 | (adcq ($ 0) (% rdx))
|
|---|
| 141 | (movq (% rdx) (% carry))
|
|---|
| 142 | (movq (% rax) (@ x8664::misc-data-offset (% result) (% i)))
|
|---|
| 143 | (addq ($ 8) (% i))
|
|---|
| 144 | @test
|
|---|
| 145 | (cmpq (% rlen) (% i))
|
|---|
| 146 | (jl @loop)
|
|---|
| 147 | (movq (% carry) (@ x8664::misc-data-offset (% result) (% i)))
|
|---|
| 148 | (single-value-return 3)))
|
|---|
| 149 |
|
|---|
| 150 |
|
|---|
| 151 |
|
|---|
| 152 |
|
|---|
| 153 |
|
|---|
| 154 |
|
|---|
| 155 | ;;; Set the ith 64-bit digit of BIGNUM to R
|
|---|
| 156 | (defx86lapfunction %set-digit ((bignum arg_x) (i arg_y) (r arg_z))
|
|---|
| 157 |
|
|---|
| 158 | (movq (@ x8664::misc-data-offset (% r)) (% imm0))
|
|---|
| 159 | (movq (% imm0) (@ x8664::misc-data-offset (% bignum) (% i)))
|
|---|
| 160 | (single-value-return))
|
|---|
| 161 |
|
|---|
| 162 | ;;; Return the (possibly truncated) 32-bit quotient and remainder
|
|---|
| 163 | ;;; resulting from dividing hi:low by divisor.
|
|---|
| 164 | (defx86lapfunction %floor ((num-high arg_x) (num-low arg_y) (divisor arg_z))
|
|---|
| 165 | (let ((unboxed-high imm1)
|
|---|
| 166 | (unboxed-low imm0)
|
|---|
| 167 | (unboxed-quo imm0)
|
|---|
| 168 | (unboxed-rem imm1)
|
|---|
| 169 | (unboxed-divisor imm2))
|
|---|
| 170 | (unbox-fixnum divisor unboxed-divisor)
|
|---|
| 171 | (unbox-fixnum num-high unboxed-high)
|
|---|
| 172 | (unbox-fixnum num-low unboxed-low)
|
|---|
| 173 | (divl (%l unboxed-divisor))
|
|---|
| 174 | (box-fixnum unboxed-quo arg_y)
|
|---|
| 175 | (box-fixnum unboxed-rem arg_z)
|
|---|
| 176 | (movq (% rsp) (% temp0))
|
|---|
| 177 | (pushq (% arg_y))
|
|---|
| 178 | (pushq (% arg_z))
|
|---|
| 179 | (set-nargs 2)
|
|---|
| 180 | (jmp-subprim .SPvalues)))
|
|---|
| 181 |
|
|---|
| 182 | ;;; Multiply two (UNSIGNED-BYTE 32) arguments, return the high and
|
|---|
| 183 | ;;; low halves of the 64-bit result
|
|---|
| 184 | (defx86lapfunction %multiply ((x arg_y) (y arg_z))
|
|---|
| 185 | (let ((unboxed-x imm0)
|
|---|
| 186 | (unboxed-y imm1)
|
|---|
| 187 | (unboxed-high imm1)
|
|---|
| 188 | (unboxed-low imm0))
|
|---|
| 189 | (unbox-fixnum x unboxed-x)
|
|---|
| 190 | (unbox-fixnum y unboxed-y)
|
|---|
| 191 | (mull (%l unboxed-y))
|
|---|
| 192 | (box-fixnum unboxed-high arg_y)
|
|---|
| 193 | (box-fixnum unboxed-low arg_z)
|
|---|
| 194 | (movq (% rsp) (% temp0))
|
|---|
| 195 | (pushq (% arg_y))
|
|---|
| 196 | (pushq (% arg_z))
|
|---|
| 197 | (set-nargs 2)
|
|---|
| 198 | (jmp-subprim .SPvalues)))
|
|---|
| 199 |
|
|---|
| 200 | ;;; Any words in the "tail" of the bignum should have been
|
|---|
| 201 | ;;; zeroed by the caller.
|
|---|
| 202 | (defx86lapfunction %set-bignum-length ((newlen arg_y) (bignum arg_z))
|
|---|
| 203 | (movq (% newlen) (% imm0))
|
|---|
| 204 | (shl ($ (- x8664::num-subtag-bits x8664::fixnumshift)) (% imm0))
|
|---|
| 205 | (movb ($ x8664::subtag-bignum) (%b imm0))
|
|---|
| 206 | (movq (% imm0) (@ x8664::misc-header-offset (% bignum)))
|
|---|
| 207 | (single-value-return))
|
|---|
| 208 |
|
|---|
| 209 | ;;; Count the sign bits in the most significant digit of bignum;
|
|---|
| 210 | ;;; return fixnum count.
|
|---|
| 211 | (defx86lapfunction %bignum-sign-bits ((bignum arg_z))
|
|---|
| 212 | (vector-size bignum imm0 imm0)
|
|---|
| 213 | (movl (@ (- x8664::misc-data-offset 4) (% bignum) (% imm0) 4) (%l imm0))
|
|---|
| 214 | (movl (% imm0.l) (% imm1.l))
|
|---|
| 215 | (notl (% imm0.l))
|
|---|
| 216 | (testl (% imm1.l) (% imm1.l))
|
|---|
| 217 | (js @wasneg)
|
|---|
| 218 | (notl (% imm0.l))
|
|---|
| 219 | @wasneg
|
|---|
| 220 | (bsrl (% imm0.l) (% imm0.l))
|
|---|
| 221 | (sete (% imm1.b))
|
|---|
| 222 | (xorl ($ 31) (% imm0))
|
|---|
| 223 | (addb (% imm1.b) (% imm0.b))
|
|---|
| 224 | (box-fixnum imm0 arg_z)
|
|---|
| 225 | (single-value-return))
|
|---|
| 226 |
|
|---|
| 227 | (defx86lapfunction %signed-bignum-ref ((bignum arg_y) (index arg_z))
|
|---|
| 228 | (uuo-error-debug-trap)
|
|---|
| 229 | (unbox-fixnum index imm0)
|
|---|
| 230 | (movslq (@ x8664::misc-data-offset (% bignum) (% imm0) 4) (% imm0))
|
|---|
| 231 | (box-fixnum imm0 arg_z)
|
|---|
| 232 | (single-value-return))
|
|---|
| 233 |
|
|---|
| 234 |
|
|---|
| 235 | ;;; If the bignum is a one-digit bignum, return the value of the
|
|---|
| 236 | ;;; single digit as a fixnum. Otherwise, if it's a two-digit-bignum
|
|---|
| 237 | ;;; and the two words of the bignum can be represented in a fixnum,
|
|---|
| 238 | ;;; return that fixnum; else return nil.
|
|---|
| 239 | (defx86lapfunction %maybe-fixnum-from-one-or-two-digit-bignum ((bignum arg_z))
|
|---|
| 240 | (getvheader bignum imm1)
|
|---|
| 241 | (cmpq ($ x8664::one-digit-bignum-header) (% imm1))
|
|---|
| 242 | (je @one)
|
|---|
| 243 | (cmpq ($ x8664::two-digit-bignum-header) (% imm1))
|
|---|
| 244 | (jne @no)
|
|---|
| 245 | (movq (@ x8664::misc-data-offset (% bignum)) (% imm0))
|
|---|
| 246 | (box-fixnum imm0 arg_z)
|
|---|
| 247 | (unbox-fixnum arg_z imm1)
|
|---|
| 248 | (cmpq (% imm0) (% imm1))
|
|---|
| 249 | (je @done)
|
|---|
| 250 | @no
|
|---|
| 251 | (movq ($ nil) (% arg_z))
|
|---|
| 252 | (single-value-return)
|
|---|
| 253 | @one
|
|---|
| 254 | (movslq (@ x8664::misc-data-offset (% bignum)) (% imm0))
|
|---|
| 255 | (box-fixnum imm0 arg_z)
|
|---|
| 256 | @done
|
|---|
| 257 | (single-value-return))
|
|---|
| 258 |
|
|---|
| 259 | ;;; Again, we're out of imm regs: a variable shift count has to go in %cl.
|
|---|
| 260 | ;;; Make sure that the rest of %rcx is 0, to keep the GC happy.
|
|---|
| 261 | ;;; %rcx == temp2
|
|---|
| 262 | (defx86lapfunction %digit-logical-shift-right ((digit arg_y) (count arg_z))
|
|---|
| 263 | (unbox-fixnum digit imm0)
|
|---|
| 264 | (unbox-fixnum count imm2)
|
|---|
| 265 | (shrq (% imm2.b) (% imm0))
|
|---|
| 266 | (box-fixnum imm0 arg_z)
|
|---|
| 267 | (single-value-return))
|
|---|
| 268 |
|
|---|
| 269 |
|
|---|
| 270 |
|
|---|
| 271 | (defx86lapfunction %ashr ((digit arg_y) (count arg_z))
|
|---|
| 272 | (unbox-fixnum digit imm0)
|
|---|
| 273 | (unbox-fixnum count imm2)
|
|---|
| 274 | (movslq (%l imm0) (% imm0))
|
|---|
| 275 | (sarq (% imm2.b) (% imm0))
|
|---|
| 276 | (box-fixnum imm0 arg_z)
|
|---|
| 277 | (single-value-return))
|
|---|
| 278 |
|
|---|
| 279 | (defx86lapfunction %ashl ((digit arg_y) (count arg_z))
|
|---|
| 280 | (unbox-fixnum digit imm0)
|
|---|
| 281 | (unbox-fixnum count imm2)
|
|---|
| 282 | (shlq (% imm2.b) (% imm0))
|
|---|
| 283 | (movl (%l imm0) (%l imm0)) ;zero-extend
|
|---|
| 284 | (box-fixnum imm0 arg_z)
|
|---|
| 285 | (single-value-return))
|
|---|
| 286 |
|
|---|
| 287 | (defx86lapfunction macptr->fixnum ((ptr arg_z))
|
|---|
| 288 | (macptr-ptr arg_z ptr)
|
|---|
| 289 | (single-value-return))
|
|---|
| 290 |
|
|---|
| 291 | (defx86lapfunction fix-digit-logand ((fix arg_x) (big arg_y) (dest arg_z)) ; index 0
|
|---|
| 292 | (let ((w1 imm0)
|
|---|
| 293 | (w2 imm1))
|
|---|
| 294 | (movq (@ x8664::misc-data-offset (% big)) (% w2))
|
|---|
| 295 | (unbox-fixnum fix w1)
|
|---|
| 296 | (andq (% w2) (% w1))
|
|---|
| 297 | (cmp-reg-to-nil dest)
|
|---|
| 298 | (jne @store)
|
|---|
| 299 | (box-fixnum w1 arg_z)
|
|---|
| 300 | (single-value-return)
|
|---|
| 301 | @store
|
|---|
| 302 | (movq (% w1) (@ x8664::misc-data-offset (% dest)))
|
|---|
| 303 | (single-value-return)))
|
|---|
| 304 |
|
|---|
| 305 | (defx86lapfunction fix-digit-logandc2 ((fix arg_x) (big arg_y) (dest arg_z))
|
|---|
| 306 | (let ((w1 imm0)
|
|---|
| 307 | (w2 imm1))
|
|---|
| 308 | (movq (@ x8664::misc-data-offset (% big)) (% w2))
|
|---|
| 309 | (unbox-fixnum fix w1)
|
|---|
| 310 | (notq (% w2))
|
|---|
| 311 | (andq (% w2) (% w1))
|
|---|
| 312 | (cmp-reg-to-nil dest)
|
|---|
| 313 | (jne @store)
|
|---|
| 314 | (box-fixnum w1 arg_z)
|
|---|
| 315 | (single-value-return)
|
|---|
| 316 | @store
|
|---|
| 317 | (movq (% w1) (@ x8664::misc-data-offset (% dest)))
|
|---|
| 318 | (single-value-return)))
|
|---|
| 319 |
|
|---|
| 320 |
|
|---|
| 321 | (defx86lapfunction fix-digit-logandc1 ((fix arg_x) (big arg_y) (dest arg_z))
|
|---|
| 322 | (let ((w1 imm0)
|
|---|
| 323 | (w2 imm1))
|
|---|
| 324 | (movq (@ x8664::misc-data-offset (% big)) (% w2))
|
|---|
| 325 | (unbox-fixnum fix w1)
|
|---|
| 326 | (notq (% w1))
|
|---|
| 327 | (andq (% w2) (% w1))
|
|---|
| 328 | (cmp-reg-to-nil dest)
|
|---|
| 329 | (jne @store)
|
|---|
| 330 | (box-fixnum w1 arg_z)
|
|---|
| 331 | (single-value-return)
|
|---|
| 332 | @store
|
|---|
| 333 | (movq (% w1) (@ x8664::misc-data-offset (% dest)))
|
|---|
| 334 | (single-value-return)))
|
|---|
| 335 |
|
|---|
| 336 | ;;; Do LOGIOR on the N 32-bit words in A and B, storing the result in
|
|---|
| 337 | ;;; C. (It's legal and desirable to do this more than 32 bits at a time.)
|
|---|
| 338 |
|
|---|
| 339 | (defx86lapfunction %bignum-logior ((n 8) #|ra 0|# (a arg_x) (b arg_y) (c arg_z))
|
|---|
| 340 | (movq (@ n (% rsp)) (% imm0))
|
|---|
| 341 | (shrq (% imm0))
|
|---|
| 342 | (testl ($ 4) (%l imm0))
|
|---|
| 343 | (je @check128)
|
|---|
| 344 | (subq ($ 4) (% imm0))
|
|---|
| 345 | (movl (@ x8664::misc-data-offset (% a) (% imm0)) (%l imm1))
|
|---|
| 346 | (orl (@ x8664::misc-data-offset (% b) (% imm0)) (%l imm1))
|
|---|
| 347 | (movl (%l imm1) (@ x8664::misc-data-offset (% c) (% imm0)))
|
|---|
| 348 | (jmp @check128)
|
|---|
| 349 | @loop64
|
|---|
| 350 | (movq (@ x8664::misc-data-offset (% a) (% imm0)) (% imm1))
|
|---|
| 351 | (orq (@ x8664::misc-data-offset (% b) (% imm0)) (% imm1))
|
|---|
| 352 | (movq (% imm1) (@ x8664::misc-data-offset (% c) (% imm0)))
|
|---|
| 353 | @test64
|
|---|
| 354 | (subq ($ 8) (% imm0))
|
|---|
| 355 | (jge @loop64)
|
|---|
| 356 | (single-value-return 3)
|
|---|
| 357 | ;; See if we can do some of this using the SSE2 hardware.
|
|---|
| 358 | ;; That's only possible if we have 6 or more words.
|
|---|
| 359 | @check128
|
|---|
| 360 | (rcmpq (% imm0) ($ (* 6 4)))
|
|---|
| 361 | (jl @test64)
|
|---|
| 362 | ;; We'll have to do the first 2 words in a 64-bit operation.
|
|---|
| 363 | ;; If the total number of words is a multiple of 4, we have
|
|---|
| 364 | ;; to do the last 2 words without using SSE2, as well.
|
|---|
| 365 | (testl ($ 8) (%l imm0))
|
|---|
| 366 | (jne @test128)
|
|---|
| 367 | (movq (@ (- x8664::misc-data-offset 8) (% a) (% imm0)) (% imm1))
|
|---|
| 368 | (orq (@ (- x8664::misc-data-offset 8) (% b) (% imm0)) (% imm1))
|
|---|
| 369 | (movq (% imm1) (@ (- x8664::misc-data-offset 8) (% c) (% imm0)))
|
|---|
| 370 | (subq ($ (+ 16 8)) (% imm0))
|
|---|
| 371 | @loop128
|
|---|
| 372 | (movaps (@ x8664::misc-data-offset (% a) (% imm0)) (% xmm0))
|
|---|
| 373 | (por (@ x8664::misc-data-offset (% b) (% imm0)) (% xmm0))
|
|---|
| 374 | (movaps (% xmm0) (@ x8664::misc-data-offset (% c) (% imm0)))
|
|---|
| 375 | @test128
|
|---|
| 376 | (subq ($ 16) (% imm0))
|
|---|
| 377 | (jg @loop128)
|
|---|
| 378 | (movq (@ x8664::misc-data-offset (% a)) (% imm1))
|
|---|
| 379 | (orq (@ x8664::misc-data-offset (% b)) (% imm1))
|
|---|
| 380 | (movq (% imm1) (@ x8664::misc-data-offset (% c)))
|
|---|
| 381 | (single-value-return 3))
|
|---|
| 382 |
|
|---|
| 383 |
|
|---|
| 384 |
|
|---|
| 385 | ;;; Do LOGAND on the N 32-bit words in A and B, storing the result in
|
|---|
| 386 | ;;; C. (It's legal and desirable to do this more than 32 bits at a time.)
|
|---|
| 387 |
|
|---|
| 388 | (defx86lapfunction %bignum-logand ((n 8) #|ra 0|# (a arg_x) (b arg_y) (c arg_z))
|
|---|
| 389 | (movq (@ n (% rsp)) (% imm0))
|
|---|
| 390 | (shrq (% imm0))
|
|---|
| 391 | (testl ($ 4) (%l imm0))
|
|---|
| 392 | (je @check128)
|
|---|
| 393 | (subq ($ 4) (% imm0))
|
|---|
| 394 | (movl (@ x8664::misc-data-offset (% a) (% imm0)) (%l imm1))
|
|---|
| 395 | (andl (@ x8664::misc-data-offset (% b) (% imm0)) (%l imm1))
|
|---|
| 396 | (movl (%l imm1) (@ x8664::misc-data-offset (% c) (% imm0)))
|
|---|
| 397 | (jmp @check128)
|
|---|
| 398 | @loop64
|
|---|
| 399 | (movq (@ x8664::misc-data-offset (% a) (% imm0)) (% imm1))
|
|---|
| 400 | (andq (@ x8664::misc-data-offset (% b) (% imm0)) (% imm1))
|
|---|
| 401 | (movq (% imm1) (@ x8664::misc-data-offset (% c) (% imm0)))
|
|---|
| 402 | @test64
|
|---|
| 403 | (subq ($ 8) (% imm0))
|
|---|
| 404 | (jge @loop64)
|
|---|
| 405 | (single-value-return 3)
|
|---|
| 406 | ;; See if we can do some of this using the SSE2 hardware.
|
|---|
| 407 | ;; That's only possible if we have 6 or more words.
|
|---|
| 408 | @check128
|
|---|
| 409 | (rcmpq (% imm0) ($ (* 6 4)))
|
|---|
| 410 | (jl @test64)
|
|---|
| 411 | ;; We'll have to do the first 2 words in a 64-bit operation.
|
|---|
| 412 | ;; If the total number of words is a multiple of 4, we have
|
|---|
| 413 | ;; to do the last 2 words without using SSE2, as well.
|
|---|
| 414 | (testl ($ 8) (%l imm0))
|
|---|
| 415 | (jne @test128)
|
|---|
| 416 | (movq (@ (- x8664::misc-data-offset 8) (% a) (% imm0)) (% imm1))
|
|---|
| 417 | (andq (@ (- x8664::misc-data-offset 8) (% b) (% imm0)) (% imm1))
|
|---|
| 418 | (movq (% imm1) (@ (- x8664::misc-data-offset 8) (% c) (% imm0)))
|
|---|
| 419 | (subq ($ (+ 16 8)) (% imm0))
|
|---|
| 420 | @loop128
|
|---|
| 421 | (movaps (@ x8664::misc-data-offset (% a) (% imm0)) (% xmm0))
|
|---|
| 422 | (pand (@ x8664::misc-data-offset (% b) (% imm0)) (% xmm0))
|
|---|
| 423 | (movaps (% xmm0) (@ x8664::misc-data-offset (% c) (% imm0)))
|
|---|
| 424 | @test128
|
|---|
| 425 | (subq ($ 16) (% imm0))
|
|---|
| 426 | (jg @loop128)
|
|---|
| 427 | (movq (@ x8664::misc-data-offset (% a)) (% imm1))
|
|---|
| 428 | (and (@ x8664::misc-data-offset (% b)) (% imm1))
|
|---|
| 429 | (movq (% imm1) (@ x8664::misc-data-offset (% c)))
|
|---|
| 430 | (single-value-return 3))
|
|---|
| 431 |
|
|---|