Changeset 13509


Ignore:
Timestamp:
Mar 9, 2010, 8:07:19 PM (10 years ago)
Author:
gz
Message:

From trunk: Faster logical operations on bignums (r13412 r13413 r13419 r13420 r13422 r13423 r13432)

Location:
branches/working-0711/ccl
Files:
4 edited

Legend:

Unmodified
Added
Removed
  • branches/working-0711/ccl

  • branches/working-0711/ccl/level-0/X86/X8664/x8664-bignum.lisp

    r13070 r13509  
    332332    (single-value-return)))
    333333
    334 
    335 
     334;;; Do LOGIOR on the N 32-bit words in A and B, storing the result in
     335;;; C.  (It's legal and desirable to do this more than 32 bits at a time.)
     336
     337(defx86lapfunction %bignum-logior ((n 8) #|ra 0|# (a arg_x) (b arg_y) (c arg_z))
     338  (movq (@ n (% rsp)) (% imm0))
     339  (shrq (% imm0))
     340  (testl ($ 4) (%l imm0))
     341  (je @check128)
     342  (subq ($ 4) (% imm0))
     343  (movl (@ x8664::misc-data-offset (% a) (% imm0)) (%l imm1))
     344  (orl (@ x8664::misc-data-offset (% b) (% imm0)) (%l imm1))
     345  (movl (%l imm1) (@ x8664::misc-data-offset (% c) (% imm0)))
     346  (jmp @check128)
     347  @loop64
     348  (movq (@ x8664::misc-data-offset (% a) (% imm0)) (% imm1))
     349  (orq (@ x8664::misc-data-offset (% b) (% imm0)) (% imm1))
     350  (movq (% imm1) (@ x8664::misc-data-offset (% c) (% imm0)))
     351  @test64
     352  (subq ($  8) (% imm0))
     353  (jge @loop64)
     354  (single-value-return 3)
     355  ;; See if we can do some of this using the SSE2 hardware.
     356  ;; That's only possible if we have 6 or more words.
     357  @check128
     358  (rcmpq (% imm0) ($ (* 6 4)))
     359  (jl @test64)
     360  ;; We'll have to do the first 2 words in a 64-bit operation.
     361  ;; If the total number of words is a multiple of 4, we have
     362  ;; to do the last 2 words without using SSE2, as well.
     363  (testl ($ 8) (%l imm0))
     364  (jne @test128)
     365  (movq (@ (- x8664::misc-data-offset 8) (% a) (% imm0)) (% imm1))
     366  (orq (@ (- x8664::misc-data-offset 8) (% b) (% imm0)) (% imm1))
     367  (movq (% imm1) (@ (- x8664::misc-data-offset 8) (% c) (% imm0)))
     368  (subq ($ (+ 16 8)) (% imm0))
     369  @loop128
     370  (movaps (@ x8664::misc-data-offset (% a) (% imm0)) (% xmm0))
     371  (por (@ x8664::misc-data-offset (% b) (% imm0)) (% xmm0))
     372  (movaps (% xmm0) (@ x8664::misc-data-offset (% c) (% imm0)))
     373  @test128
     374  (subq ($ 16) (% imm0))
     375  (jg @loop128)
     376  (movq (@ x8664::misc-data-offset (% a)) (% imm1))
     377  (orq (@ x8664::misc-data-offset (% b)) (% imm1))
     378  (movq (% imm1) (@ x8664::misc-data-offset (% c)))
     379  (single-value-return 3))
     380
     381
     382
     383;;; Do LOGAND on the N 32-bit words in A and B, storing the result in
     384;;; C.  (It's legal and desirable to do this more than 32 bits at a time.)
     385
     386(defx86lapfunction %bignum-logand ((n 8) #|ra 0|# (a arg_x) (b arg_y) (c arg_z))
     387  (movq (@ n (% rsp)) (% imm0))
     388  (shrq (% imm0))
     389  (testl ($ 4) (%l imm0))
     390  (je @check128)
     391  (subq ($ 4) (% imm0))
     392  (movl (@ x8664::misc-data-offset (% a) (% imm0)) (%l imm1))
     393  (andl (@ x8664::misc-data-offset (% b) (% imm0)) (%l imm1))
     394  (movl (%l imm1) (@ x8664::misc-data-offset (% c) (% imm0)))
     395  (jmp @check128)
     396  @loop64
     397  (movq (@ x8664::misc-data-offset (% a) (% imm0)) (% imm1))
     398  (andq (@ x8664::misc-data-offset (% b) (% imm0)) (% imm1))
     399  (movq (% imm1) (@ x8664::misc-data-offset (% c) (% imm0)))
     400  @test64
     401  (subq ($  8) (% imm0))
     402  (jge @loop64)
     403  (single-value-return 3)
     404  ;; See if we can do some of this using the SSE2 hardware.
     405  ;; That's only possible if we have 6 or more words.
     406  @check128
     407  (rcmpq (% imm0) ($ (* 6 4)))
     408  (jl @test64)
     409  ;; We'll have to do the first 2 words in a 64-bit operation.
     410  ;; If the total number of words is a multiple of 4, we have
     411  ;; to do the last 2 words without using SSE2, as well.
     412  (testl ($ 8) (%l imm0))
     413  (jne @test128)
     414  (movq (@ (- x8664::misc-data-offset 8) (% a) (% imm0)) (% imm1))
     415  (andq (@ (- x8664::misc-data-offset 8) (% b) (% imm0)) (% imm1))
     416  (movq (% imm1) (@ (- x8664::misc-data-offset 8) (% c) (% imm0)))
     417  (subq ($ (+ 16 8)) (% imm0))
     418  @loop128
     419  (movaps (@ x8664::misc-data-offset (% a) (% imm0)) (% xmm0))
     420  (pand (@ x8664::misc-data-offset (% b) (% imm0)) (% xmm0))
     421  (movaps (% xmm0) (@ x8664::misc-data-offset (% c) (% imm0)))
     422  @test128
     423  (subq ($ 16) (% imm0))
     424  (jg @loop128)
     425  (movq (@ x8664::misc-data-offset (% a)) (% imm1))
     426  (and (@ x8664::misc-data-offset (% b)) (% imm1))
     427  (movq (% imm1) (@ x8664::misc-data-offset (% c)))
     428  (single-value-return 3))
     429
  • branches/working-0711/ccl/level-0/X86/x86-array.lisp

    r13070 r13509  
    231231
    232232
    233 
    234 (defx86lapfunction %boole-clr ((idx 8) #|(ra 0)|# (b0 arg_x) (b1 arg_y) (dest arg_z))
    235   (movq (@ idx (% rsp)) (% temp0))
    236   (movq ($ 0) (@ x8664::misc-data-offset (% dest) (% temp0)))
    237   (single-value-return 3))
    238 
    239 (defx86lapfunction %boole-set ((idx 8) #|(ra 0)|# (b0 arg_x) (b1 arg_y) (dest arg_z))
    240   (movq (@ idx (% rsp)) (% temp0))
    241   (movq ($ -1) (@ x8664::misc-data-offset (% dest) (% temp0)))
    242   (single-value-return 3))
    243 
    244 (defx86lapfunction %boole-1 ((idx 8) #|(ra 0)|# (b0 arg_x) (b1 arg_y) (dest arg_z))
    245   (movq (@ idx (% rsp)) (% temp0))
    246   (movq (@ x8664::misc-data-offset (% b0) (% temp0)) (% imm0))
    247   (movq (% imm0) (@ x8664::misc-data-offset (% dest) (% temp0)))
    248   (single-value-return 3))
    249 
    250 (defx86lapfunction %boole-2 ((idx 8) #|(ra 0)|# (b0 arg_x) (b1 arg_y) (dest arg_z))
    251   (movq (@ idx (% rsp)) (% temp0))
    252   (movq (@ x8664::misc-data-offset (% b1) (% temp0)) (% imm0))
    253   (movq (% imm0) (@ x8664::misc-data-offset (% dest) (% temp0)))
    254   (single-value-return 3))
    255 
    256 (defx86lapfunction %boole-c1 ((idx 8) #|(ra 0)|# (b0 arg_x) (b1 arg_y) (dest arg_z))
    257   (movq (@ idx (% rsp)) (% temp0))
    258   (movq (@ x8664::misc-data-offset (% b0) (% temp0)) (% imm0))
    259   (notq (% imm0))
    260   (movq (% imm0) (@ x8664::misc-data-offset (% dest) (% temp0)))
    261   (single-value-return 3))
    262 
    263 (defx86lapfunction %boole-c2 ((idx 8) #|(ra 0)|# (b0 arg_x) (b1 arg_y) (dest arg_z))
    264   (movq (@ idx (% rsp)) (% temp0))
    265   (movq (@ x8664::misc-data-offset (% b1) (% temp0)) (% imm0))
    266   (notq (% imm0))
    267   (movq (% imm0) (@ x8664::misc-data-offset (% dest) (% temp0)))
    268   (single-value-return 3))
    269 
    270 (defx86lapfunction %boole-and ((idx 8) #|(ra 0)|# (b0 arg_x) (b1 arg_y) (dest arg_z))
    271   (movq (@ idx (% rsp)) (% temp0))
    272   (movq (@ x8664::misc-data-offset (% b0) (% temp0)) (% imm0))
    273   (andq (@ x8664::misc-data-offset (% b1) (% temp0)) (% imm0))
    274   (movq (% imm0) (@ x8664::misc-data-offset (% dest) (% temp0)))
    275   (single-value-return 3))
    276 
    277 (defx86lapfunction %boole-ior ((idx 8) #|(ra 0)|# (b0 arg_x) (b1 arg_y) (dest arg_z))
    278   (movq (@ idx (% rsp)) (% temp0))
    279   (movq (@ x8664::misc-data-offset (% b0) (% temp0)) (% imm0))
    280   (orq (@ x8664::misc-data-offset (% b1) (% temp0)) (% imm0))
    281   (movq (% imm0) (@ x8664::misc-data-offset (% dest) (% temp0)))
    282   (single-value-return 3))
    283 
    284 (defx86lapfunction %boole-xor ((idx 8) #|(ra 0)|# (b0 arg_x) (b1 arg_y) (dest arg_z))
    285   (movq (@ idx (% rsp)) (% temp0))
    286   (movq (@ x8664::misc-data-offset (% b0) (% temp0)) (% imm0))
    287   (xorq (@ x8664::misc-data-offset (% b1) (% temp0)) (% imm0))
    288   (movq (% imm0) (@ x8664::misc-data-offset (% dest) (% temp0)))
    289   (single-value-return 3))
    290 
    291 (defx86lapfunction %boole-eqv ((idx 8) #|(ra 0)|# (b0 arg_x) (b1 arg_y) (dest arg_z))
    292   (movq (@ idx (% rsp)) (% temp0))
    293   (movq (@ x8664::misc-data-offset (% b0) (% temp0)) (% imm0))
    294   (xorq (@ x8664::misc-data-offset (% b1) (% temp0)) (% imm0))
    295   (notq (% imm0))
    296   (movq (% imm0) (@ x8664::misc-data-offset (% dest) (% temp0)))
    297   (single-value-return 3))
    298 
    299 (defx86lapfunction %boole-nand ((idx 8) #|(ra 0)|# (b0 arg_x) (b1 arg_y) (dest arg_z))
    300   (movq (@ idx (% rsp)) (% temp0))
    301   (movq (@ x8664::misc-data-offset (% b0) (% temp0)) (% imm0))
    302   (andq (@ x8664::misc-data-offset (% b1) (% temp0)) (% imm0))
    303   (notq (% imm0))
    304   (movq (% imm0) (@ x8664::misc-data-offset (% dest) (% temp0)))
    305   (single-value-return 3))
    306 
    307 (defx86lapfunction %boole-nor ((idx 8) #|(ra 0)|# (b0 arg_x) (b1 arg_y) (dest arg_z))
    308   (movq (@ idx (% rsp)) (% temp0))
    309   (movq (@ x8664::misc-data-offset (% b0) (% temp0)) (% imm0))
    310   (orq (@ x8664::misc-data-offset (% b1) (% temp0)) (% imm0))
    311   (notq (% imm0))
    312   (movq (% imm0) (@ x8664::misc-data-offset (% dest) (% temp0)))
    313   (single-value-return 3))
    314 
    315 (defx86lapfunction %boole-andc1 ((idx 8) #|(ra 0)|# (b0 arg_x) (b1 arg_y) (dest arg_z))
    316   (movq (@ idx (% rsp)) (% temp0))
    317   (movq (@ x8664::misc-data-offset (% b0) (% temp0)) (% imm0))
    318   (notq (% imm0))
    319   (andq (@ x8664::misc-data-offset (% b1) (% temp0)) (% imm0))
    320   (movq (% imm0) (@ x8664::misc-data-offset (% dest) (% temp0)))
    321   (single-value-return 3))
    322 
    323 (defx86lapfunction %boole-andc2 ((idx 8) #|(ra 0)|# (b0 arg_x) (b1 arg_y) (dest arg_z))
    324   (movq (@ idx (% rsp)) (% temp0))
    325   (movq (@ x8664::misc-data-offset (% b1) (% temp0)) (% imm0))
    326   (notq (% imm0))
    327   (andq (@ x8664::misc-data-offset (% b0) (% temp0)) (% imm0))
    328   (movq (% imm0) (@ x8664::misc-data-offset (% dest) (% temp0)))
    329   (single-value-return 3))
    330 
    331 (defx86lapfunction %boole-orc1 ((idx 8) #|(ra 0)|# (b0 arg_x) (b1 arg_y) (dest arg_z))
    332   (movq (@ idx (% rsp)) (% temp0))
    333   (movq (@ x8664::misc-data-offset (% b0) (% temp0)) (% imm0))
    334   (notq (% imm0))
    335   (orq (@ x8664::misc-data-offset (% b1) (% temp0)) (% imm0))
    336   (movq (% imm0) (@ x8664::misc-data-offset (% dest) (% temp0)))
    337   (single-value-return 3))
    338 
    339 (defx86lapfunction %boole-orc2 ((idx 8) #|(ra 0)|# (b0 arg_x) (b1 arg_y) (dest arg_z))
    340   (movq (@ idx (% rsp)) (% temp0))
    341   (movq (@ x8664::misc-data-offset (% b1) (% temp0)) (% imm0))
    342   (notq (% imm0))
    343   (orq (@ x8664::misc-data-offset (% b0) (% temp0)) (% imm0))
    344   (movq (% imm0) (@ x8664::misc-data-offset (% dest) (% temp0)))
     233;;; In each of these "simple BOOLE functions", the LEN argument
     234;;; describes the size of the bit vectors in whole or partial
     235;;; native-sized words.  The 0th word (and, if the number of
     236;;; words is even, the last word) are not aligned on 16-byte
     237;;; boundaries; any intervening pairs of words are aligned on
     238;;; 16-byte boundaries, and we can use aligned SSE2 instructions
     239;;; to process these bits 128 at a time.
     240
     241(defx86lapfunction %boole-clr ((len 8) #|(ra 0)|# (b0 arg_x) (b1 arg_y) (dest arg_z))
     242  (movq (@ len (% rsp)) (% temp0))
     243  (orl ($ 1) (%l temp0))
     244  (pxor (% xmm0) (% xmm0))
     245  (jmp @test)
     246  @loop
     247  (movaps (% xmm0) (@ x8664::misc-data-offset (% dest) (% temp0)))
     248  @test
     249  (subq ($ '2) (% temp0))
     250  (jg @loop)
     251  (movq ($ 0) (@ x8664::misc-data-offset (% dest)))
     252  (single-value-return 3))
     253
     254(defx86lapfunction %boole-set ((len 8) #|(ra 0)|# (b0 arg_x) (b1 arg_y) (dest arg_z))
     255  (movq (@ len (% rsp)) (% temp0))
     256  (orl ($ '1) (%l temp0))
     257  (pcmpeqb (% xmm0) (% xmm0))
     258  (jmp @test)
     259  @loop
     260  (movaps (% xmm0) (@ x8664::misc-data-offset (% dest) (% temp0)))
     261  @test
     262  (subq ($ '2) (% temp0))
     263  (jg @loop)
     264  (movq ($ -1) (@ x8664::misc-data-offset (% dest)))
     265  (single-value-return 3)
     266  (:align 4))
     267
     268(defx86lapfunction %boole-1 ((len 8) #|(ra 0)|# (b0 arg_x) (b1 arg_y) (dest arg_z))
     269  (movq (@ len (% rsp)) (% temp0))
     270  (orl ($ '1) (%l temp0))
     271  (jmp @test)
     272  @loop
     273  (movaps (@ x8664::misc-data-offset (% b0) (% temp0)) (% xmm0))
     274  (movaps (% xmm0) (@ x8664::misc-data-offset (% dest) (% temp0)))
     275  @test
     276  (subq ($ '2) (% temp0))
     277  (jg @loop)
     278  (movq (@ x8664::misc-data-offset (% b0)) (% imm0))
     279  (movq (% imm0) (@ x8664::misc-data-offset (% dest)))
     280  (single-value-return 3))
     281
     282(defx86lapfunction %boole-2 ((len 8) #|(ra 0)|# (b0 arg_x) (b1 arg_y) (dest arg_z))
     283  (movq (@ len (% rsp)) (% temp0))
     284  (orl ($ '1) (%l temp0))
     285  (jmp @test)
     286  @loop
     287  (movaps (@ x8664::misc-data-offset (% b1) (% temp0)) (% xmm0))
     288  (movaps (% xmm0) (@ x8664::misc-data-offset (% dest) (% temp0)))
     289  @test
     290  (subq ($ '2) (% temp0))
     291  (jg @loop)
     292  (movq (@ x8664::misc-data-offset (% b1)) (% imm0))
     293  (movq (% imm0) (@ x8664::misc-data-offset (% dest)))
     294  (single-value-return 3))
     295
     296(defx86lapfunction %boole-c1 ((len 8) #|(ra 0)|# (b0 arg_x) (b1 arg_y) (dest arg_z))
     297  (movq (@ len (% rsp)) (% temp0))
     298  (pcmpeqb (% xmm1) (% xmm1))
     299  (orl ($ '1) (%l temp0))
     300  (jmp @test)
     301  @loop
     302  (movdqa (@ x8664::misc-data-offset (% b0) (% temp0)) (% xmm0))
     303  (pxor (% xmm1) (% xmm0))
     304  (movdqa (% xmm0) (@ x8664::misc-data-offset (% dest) (% temp0)))
     305  @test
     306  (subq ($ '2) (% temp0))
     307  (jg @loop)
     308  (movq (@ x8664::misc-data-offset (% b0)) (% imm0))
     309  (notq (% imm0))
     310  (movq (% imm0) (@ x8664::misc-data-offset (% dest)))
     311  (single-value-return 3))
     312
     313(defx86lapfunction %boole-c2 ((len 8) #|(ra 0)|# (b0 arg_x) (b1 arg_y) (dest arg_z))
     314  (movq (@ len (% rsp)) (% temp0))
     315  (pcmpeqb (% xmm1) (% xmm1))
     316  (orl ($ '1) (%l temp0))
     317  (jmp @test)
     318  @loop
     319  (movaps (@ x8664::misc-data-offset (% b1) (% temp0)) (% xmm0))
     320  (pxor (% xmm1) (% xmm0))
     321  (movaps (% xmm0) (@ x8664::misc-data-offset (% dest) (% temp0)))
     322  @test
     323  (subq ($ '2) (% temp0))
     324  (jg @loop)
     325  (movq (@ x8664::misc-data-offset (% b1)) (% imm0))
     326  (notq (% imm0))
     327  (movq (% imm0) (@ x8664::misc-data-offset (% dest)))
     328  (single-value-return 3))
     329
     330(defx86lapfunction %boole-and ((len 8) #|(ra 0)|# (b0 arg_x) (b1 arg_y) (dest arg_z))
     331  (movq (@ len (% rsp)) (% temp0))
     332  (orl ($ '1) (%l temp0))
     333  (jmp @test)
     334  @loop
     335  (movaps (@ x8664::misc-data-offset (% b0) (% temp0)) (% xmm0))
     336  (pand (@ x8664::misc-data-offset (% b1) (% temp0)) (% xmm0))
     337  (movaps (% xmm0) (@ x8664::misc-data-offset (% dest) (% temp0)))
     338  @test
     339  (subq ($ '2) (% temp0))
     340  (jg @loop)
     341  (movq (@ x8664::misc-data-offset (% b0)) (% imm0))
     342  (andq (@ x8664::misc-data-offset (% b1)) (% imm0))
     343  (movq (% imm0) (@ x8664::misc-data-offset (% dest)))
     344  (single-value-return 3))
     345
     346(defx86lapfunction %boole-ior ((len 8) #|(ra 0)|# (b0 arg_x) (b1 arg_y) (dest arg_z))
     347  (movq (@ len (% rsp)) (% temp0))
     348  (orl ($ '1) (%l temp0))
     349  (jmp @test)
     350  @loop
     351  (movaps (@ x8664::misc-data-offset (% b0) (% temp0)) (% xmm0))
     352  (por (@ x8664::misc-data-offset (% b1) (% temp0)) (% xmm0))
     353  (movaps (% xmm0) (@ x8664::misc-data-offset (% dest) (% temp0)))
     354  @test
     355  (subq ($ '2) (% temp0))
     356  (jg @loop)
     357  (movq (@ x8664::misc-data-offset (% b0)) (% imm0))
     358  (orq (@ x8664::misc-data-offset (% b1)) (% imm0))
     359  (movq (% imm0) (@ x8664::misc-data-offset (% dest)))
     360  (single-value-return 3))
     361
     362(defx86lapfunction %boole-xor ((len 8) #|(ra 0)|# (b0 arg_x) (b1 arg_y) (dest arg_z))
     363  (movq (@ len (% rsp)) (% temp0))
     364  (orl ($ '1) (%l temp0))
     365  (jmp @test)
     366  @loop
     367  (movaps (@ x8664::misc-data-offset (% b0) (% temp0)) (% xmm0))
     368  (pxor (@ x8664::misc-data-offset (% b1) (% temp0)) (% xmm0))
     369  (movaps (% xmm0) (@ x8664::misc-data-offset (% dest) (% temp0)))
     370  @test
     371  (subq ($ '2) (% temp0))
     372  (jg @loop)
     373  (movq (@ x8664::misc-data-offset (% b0)) (% imm0))
     374  (xorq (@ x8664::misc-data-offset (% b1)) (% imm0))
     375  (movq (% imm0) (@ x8664::misc-data-offset (% dest)))
     376  (single-value-return 3))
     377
     378(defx86lapfunction %boole-eqv ((len 8) #|(ra 0)|# (b0 arg_x) (b1 arg_y) (dest arg_z))
     379  (movq (@ len (% rsp)) (% temp0))
     380  (orl ($ '1) (%l temp0))
     381  (pcmpeqb (% xmm1) (% xmm1))
     382  (jmp @test)
     383  @loop
     384  (movaps (@ x8664::misc-data-offset (% b0) (% temp0)) (% xmm0))
     385  (pxor (@ x8664::misc-data-offset (% b1) (% temp0)) (% xmm0))
     386  (pxor (% xmm1) (% xmm0))
     387  (movaps(% xmm0) (@ x8664::misc-data-offset (% dest) (% temp0)))
     388  @test
     389  (subq ($ '2) (% temp0))
     390  (jg @loop) 
     391  (movq (@ x8664::misc-data-offset (% b0)) (% imm0))
     392  (xorq (@ x8664::misc-data-offset (% b1)) (% imm0))
     393  (notq (% imm0))
     394  (movq (% imm0) (@ x8664::misc-data-offset (% dest)))
     395  (single-value-return 3))
     396
     397(defx86lapfunction %boole-nand ((len 8) #|(ra 0)|# (b0 arg_x) (b1 arg_y) (dest arg_z))
     398  (movq (@ len (% rsp)) (% temp0))
     399  (orl ($ '1) (%l temp0))
     400  (pcmpeqb (% xmm1) (% xmm1))
     401  (jmp @test)
     402  @loop
     403  (movaps (@ x8664::misc-data-offset (% b0) (% temp0)) (% xmm0))
     404  (pand (@ x8664::misc-data-offset (% b1) (% temp0)) (% xmm0))
     405  (pxor (% xmm1) (% xmm0))
     406  (movaps(% xmm0) (@ x8664::misc-data-offset (% dest) (% temp0)))
     407  @test
     408  (subq ($ '2) (% temp0))
     409  (jg @loop) 
     410  (movq (@ x8664::misc-data-offset (% b0)) (% imm0))
     411  (andq (@ x8664::misc-data-offset (% b1)) (% imm0))
     412  (notq (% imm0))
     413  (movq (% imm0) (@ x8664::misc-data-offset (% dest)))
     414  (single-value-return 3))
     415
     416(defx86lapfunction %boole-nor ((len 8) #|(ra 0)|# (b0 arg_x) (b1 arg_y) (dest arg_z))
     417  (movq (@ len (% rsp)) (% temp0))
     418  (orl ($ '1) (%l temp0))
     419  (pcmpeqb (% xmm1) (% xmm1))
     420  (jmp @test)
     421  @loop
     422  (movaps (@ x8664::misc-data-offset (% b0) (% temp0)) (% xmm0))
     423  (por (@ x8664::misc-data-offset (% b1) (% temp0)) (% xmm0))
     424  (pxor (% xmm1) (% xmm0))
     425  (movaps(% xmm0) (@ x8664::misc-data-offset (% dest) (% temp0)))
     426  @test
     427  (subq ($ '2) (% temp0))
     428  (jg @loop) 
     429  (movq (@ x8664::misc-data-offset (% b0)) (% imm0))
     430  (orq (@ x8664::misc-data-offset (% b1)) (% imm0))
     431  (notq (% imm0))
     432  (movq (% imm0) (@ x8664::misc-data-offset (% dest)))
     433  (single-value-return 3))
     434
     435(defx86lapfunction %boole-andc1 ((len 8) #|(ra 0)|# (b0 arg_x) (b1 arg_y) (dest arg_z))
     436  (movq (@ len (% rsp)) (% temp0))
     437  (orl ($ '1) (%l temp0))
     438  (pcmpeqb (% xmm1) (% xmm1))
     439  (jmp @test)
     440  @loop
     441  (movaps (@ x8664::misc-data-offset (% b0) (% temp0)) (% xmm0))
     442  (pxor (% xmm1) (% xmm0))
     443  (pand (@ x8664::misc-data-offset (% b1) (% temp0)) (% xmm0))
     444  (movaps (% xmm0) (@ x8664::misc-data-offset (% dest) (% temp0)))
     445  @test
     446  (subq ($ '2) (% temp0))
     447  (jg @loop) 
     448  (movq (@ x8664::misc-data-offset (% b0)) (% imm0))
     449  (notq (% imm0))
     450  (andq (@ x8664::misc-data-offset (% b1)) (% imm0))
     451  (movq (% imm0) (@ x8664::misc-data-offset (% dest)))
     452  (single-value-return 3))
     453
     454(defx86lapfunction %boole-andc2 ((len 8) #|(ra 0)|# (b0 arg_x) (b1 arg_y) (dest arg_z))
     455  (movq (@ len (% rsp)) (% temp0))
     456  (orl ($ '1) (%l temp0))
     457  (pcmpeqb (% xmm1) (% xmm1))
     458  (jmp @test)
     459  @loop
     460  (movaps (@ x8664::misc-data-offset (% b1) (% temp0)) (% xmm0))
     461  (pxor (% xmm1) (% xmm0))
     462  (pand (@ x8664::misc-data-offset (% b0) (% temp0)) (% xmm0))
     463  (movaps (% xmm0) (@ x8664::misc-data-offset (% dest) (% temp0)))
     464  @test
     465  (subq ($ '2) (% temp0))
     466  (jg @loop) 
     467  (movq (@ x8664::misc-data-offset (% b1)) (% imm0))
     468  (notq (% imm0))
     469  (andq (@ x8664::misc-data-offset (% b0)) (% imm0))
     470  (movq (% imm0) (@ x8664::misc-data-offset (% dest)))
     471  (single-value-return 3))
     472
     473(defx86lapfunction %boole-orc1 ((len 8) #|(ra 0)|# (b0 arg_x) (b1 arg_y) (dest arg_z))
     474  (movq (@ len (% rsp)) (% temp0))
     475  (orl ($ '1) (%l temp0))
     476  (pcmpeqb (% xmm1) (% xmm1))
     477  (jmp @test)
     478  @loop
     479  (movaps (@ x8664::misc-data-offset (% b0) (% temp0)) (% xmm0))
     480  (pxor (% xmm1) (% xmm0))
     481  (por (@ x8664::misc-data-offset (% b1) (% temp0)) (% xmm0))
     482  (movaps (% xmm0) (@ x8664::misc-data-offset (% dest) (% temp0)))
     483  @test
     484  (subq ($ '2) (% temp0))
     485  (jg @loop) 
     486  (movq (@ x8664::misc-data-offset (% b0)) (% imm0))
     487  (notq (% imm0))
     488  (orq (@ x8664::misc-data-offset (% b1)) (% imm0))
     489  (movq (% imm0) (@ x8664::misc-data-offset (% dest)))
     490  (single-value-return 3))
     491
     492(defx86lapfunction %boole-orc2 ((len 8) #|(ra 0)|# (b0 arg_x) (b1 arg_y) (dest arg_z))
     493  (movq (@ len (% rsp)) (% temp0))
     494  (orl ($ '1) (%l temp0))
     495  (pcmpeqb (% xmm1) (% xmm1))
     496  (jmp @test)
     497  @loop
     498  (movaps (@ x8664::misc-data-offset (% b1) (% temp0)) (% xmm0))
     499  (pxor (% xmm1) (% xmm0))
     500  (por (@ x8664::misc-data-offset (% b0) (% temp0)) (% xmm0))
     501  (movq (% xmm0) (@ x8664::misc-data-offset (% dest) (% temp0)))
     502  @test
     503  (subq ($ '2) (% temp0))
     504  (jg @loop) 
     505  (movq (@ x8664::misc-data-offset (% b1)) (% imm0))
     506  (notq (% imm0))
     507  (orq (@ x8664::misc-data-offset (% b0)) (% imm0))
     508  (movq (% imm0) (@ x8664::misc-data-offset (% dest)))
    345509  (single-value-return 3))
    346510
     
    367531
    368532(defun %simple-bit-boole (op b1 b2 result)
    369   (let* ((f (svref *simple-bit-boole-functions* op)))
    370     (dotimes (i (ash (the fixnum (+ (length result) 63)) -6) result)
    371       (funcall f i b1 b2 result))))
     533  (funcall (svref *simple-bit-boole-functions* op)
     534           (ash (the fixnum (+ (length result) 63)) -6)
     535           b1
     536           b2
     537           result))
    372538
    373539(defx86lapfunction %aref2 ((array arg_x) (i arg_y) (j arg_z))
  • branches/working-0711/ccl/level-0/l0-bignum64.lisp

    r13339 r13509  
    12631263  (let* ((len-a (%bignum-length a))
    12641264         (len-b (%bignum-length b))
    1265          (a-plusp (bignum-plusp a))
    1266          (b-plusp (bignum-plusp b)))
    1267     (declare (type bignum-index len-a len-b))
    1268     (cond
    1269       ((< len-a len-b)
    1270        (if a-plusp
    1271          (logand-shorter-positive a len-a b (%allocate-bignum len-a))
    1272          (logand-shorter-negative a len-a b len-b (%allocate-bignum len-b))))
    1273       ((< len-b len-a)
    1274        (if b-plusp
    1275          (logand-shorter-positive b len-b a (%allocate-bignum len-b))
    1276          (logand-shorter-negative b len-b a len-a (%allocate-bignum len-a))))
    1277       (t (logand-shorter-positive a len-a b (%allocate-bignum len-a))))))
    1278 
    1279 ;;; LOGAND-SHORTER-POSITIVE -- Internal.
    1280 ;;;
    1281 ;;; This takes a shorter bignum, a and len-a, that is positive.  Because this
    1282 ;;; is AND, we don't care about any bits longer than a's since its infinite 0
    1283 ;;; sign bits will mask the other bits out of b.  The result is len-a big.
    1284 ;;;
    1285 (defun logand-shorter-positive (a len-a b res)
    1286   (declare (type bignum-type a b res)
    1287            (type bignum-index len-a))
    1288   (dotimes (i len-a)
    1289     (setf (bignum-ref res i)
    1290           (logand (the fixnum (bignum-ref a i))
    1291                   (the fixnum (bignum-ref b i)))))
    1292   (%normalize-bignum-macro res))
    1293 
    1294 ;;; LOGAND-SHORTER-NEGATIVE -- Internal.
    1295 ;;;
    1296 ;;; This takes a shorter bignum, a and len-a, that is negative.  Because this
    1297 ;;; is AND, we just copy any bits longer than a's since its infinite 1 sign
    1298 ;;; bits will include any bits from b.  The result is len-b big.
    1299 ;;;
    1300 (defun logand-shorter-negative (a len-a b len-b res)
    1301   (declare (type bignum-type a b res)
    1302            (type bignum-index len-a len-b))
    1303   (dotimes (i len-a)
    1304     (setf (bignum-ref res i)
    1305           (logand (the fixnum (bignum-ref a i))
    1306                               (the fixnum (bignum-ref b i)))))
    1307   (bignum-replace res b :start1 len-a :start2 len-a :end1 len-b :end2 len-b)
    1308   (%normalize-bignum-macro res))
    1309 
     1265         (shorter a)
     1266         (longer b)
     1267         (shorter-len len-a)
     1268         (longer-len len-b)
     1269         (shorter-positive (bignum-plusp a)))
     1270    (declare (type bignum-index len-a len-b shorter-len longer-len))
     1271    (when (< len-b len-a)
     1272      (setq shorter b
     1273            longer a
     1274            shorter-len len-b
     1275            longer-len len-a
     1276            shorter-positive (bignum-plusp b)))
     1277    (let* ((result (%allocate-bignum longer-len)))
     1278      (%bignum-logand shorter-len shorter longer result)
     1279      (unless shorter-positive
     1280        (bignum-replace result longer :start1 shorter-len :start2 shorter-len :end1 longer-len :end2 longer-len))
     1281      (%normalize-bignum-macro result))))
    13101282
    13111283
     
    14081380  (let* ((len-a (%bignum-length a))
    14091381         (len-b (%bignum-length b))
    1410          (a-plusp (bignum-plusp a))
    1411          (b-plusp (bignum-plusp b)))
    1412     (declare (type bignum-index len-a len-b))
    1413     (cond
    1414      ((< len-a len-b)
    1415       (if a-plusp
    1416           (logior-shorter-positive a len-a b len-b (%allocate-bignum len-b))
    1417           (logior-shorter-negative a len-a b len-b (%allocate-bignum len-b))))
    1418      ((< len-b len-a)
    1419       (if b-plusp
    1420           (logior-shorter-positive b len-b a len-a (%allocate-bignum len-a))
    1421           (logior-shorter-negative b len-b a len-a (%allocate-bignum len-a))))
    1422      (t (logior-shorter-positive a len-a b len-b (%allocate-bignum len-a))))))
    1423 
    1424 ;;; LOGIOR-SHORTER-POSITIVE -- Internal.
    1425 ;;;
    1426 ;;; This takes a shorter bignum, a and len-a, that is positive.  Because this
    1427 ;;; is IOR, we don't care about any bits longer than a's since its infinite
    1428 ;;; 0 sign bits will mask the other bits out of b out to len-b.  The result
    1429 ;;; is len-b long.
    1430 ;;;
    1431 (defun logior-shorter-positive (a len-a b len-b res)
    1432   (declare (type bignum-type a b res)
    1433            (type bignum-index len-a len-b))
    1434   (dotimes (i len-a)
    1435     (setf (bignum-ref res i)
    1436           (logior (the fixnum (bignum-ref a i))
    1437                   (the fixnum (bignum-ref b i)))))
    1438   (if (not (eql len-a len-b))
    1439     (bignum-replace res b :start1 len-a :start2 len-a :end1 len-b :end2 len-b))
    1440   (%normalize-bignum-macro res))
    1441 
    1442 ;;; LOGIOR-SHORTER-NEGATIVE -- Internal.
    1443 ;;;
    1444 ;;; This takes a shorter bignum, a and len-a, that is negative.  Because this
    1445 ;;; is IOR, we just copy any bits longer than a's since its infinite 1 sign
    1446 ;;; bits will include any bits from b.  The result is len-b long.
    1447 ;;;
    1448 (defun logior-shorter-negative (a len-a b len-b res)
    1449   (declare (type bignum-type a b res)
    1450            (type bignum-index len-a len-b))
    1451   (dotimes (i len-a)
    1452     (setf (bignum-ref res i)
    1453           (logior (the fixnum (bignum-ref a i))
    1454                   (the fixnum (bignum-ref b i)))))
    1455   (do ((i len-a (1+ i)))
    1456       ((= i len-b))
    1457     (declare (type bignum-index i))
    1458     (setf (bignum-ref res i) #xffffffff))
    1459   (%normalize-bignum-macro res))
    1460 
     1382         (longer-len len-b)
     1383         (shorter-len len-a)
     1384         (shorter a)
     1385         (longer b)
     1386         (shorter-positive (bignum-plusp a)))
     1387    (declare (type bignum-index len-a len-b longer-len shorter-len))
     1388    (when (< len-b len-a)
     1389      (setq shorter b
     1390            longer a
     1391            shorter-len len-b
     1392            longer-len len-a
     1393            shorter-positive (bignum-plusp b)))
     1394    (let* ((result (%allocate-bignum longer-len)))
     1395      (%bignum-logior shorter-len shorter longer result)
     1396      (unless (= shorter-len longer-len)
     1397        (if shorter-positive
     1398          (bignum-replace result longer :start1 shorter-len :start2 shorter-len :end1 longer-len :end2 longer-len)
     1399          (do* ((i shorter-len (1+ i)))
     1400               ((= i longer-len))
     1401            (declare (type bignum-index i))
     1402            (setf (bignum-ref result i) #xffffffff))))
     1403      (%normalize-bignum-macro result))))
    14611404
    14621405
Note: See TracChangeset for help on using the changeset viewer.