Changeset 15036


Ignore:
Timestamp:
Oct 24, 2011, 10:57:57 AM (8 years ago)
Author:
gb
Message:

Re-do %COPY-IVECTOR-TO-IVECTOR for x8664. Handle overlap correctly (old
version was too wimpy), copy aligned chunks 32/64/128 bits at a time.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/source/level-0/X86/x86-misc.lisp

    r14710 r15036  
    6363                                         (nbytes arg_z))
    6464  (let ((rsrc temp0)
    65         (rsrc-byte-offset temp1))
     65        (rsrc-byte-offset imm0)
     66        (rdest-byte-offset imm1)
     67        (rdata imm2))
     68    (movq (@ src-byte-offset (% rsp)) (% rsrc-byte-offset))
     69    (sarq ($ x8664::word-shift) (% rsrc-byte-offset))
     70    (movq (% dest-byte-offset) (% rdest-byte-offset))
     71    (sarq ($ x8664::word-shift) (% rdest-byte-offset))
     72    (movq (@ src (% rsp)) (% rsrc))
    6673    (testq (% nbytes) (% nbytes))
    67     (movq (@ src-byte-offset (% rsp)) (% rsrc-byte-offset))
    68     (movq (@ src (% rsp)) (% rsrc))
    6974    (jmp @test)
    7075    @loop
     
    8590(defun %copy-ivector-to-ivector (src src-byte-offset dest dest-byte-offset nbytes)
    8691  (declare (fixnum src-byte-offset dest-byte-offset nbytes))
    87   (if (or (eq src dest)
    88           (not (eql 0 src-byte-offset))
    89           (not (eql 0 dest-byte-offset))
    90           (< nbytes 8))
    91     (%copy-ivector-to-ivector-bytes src src-byte-offset dest dest-byte-offset nbytes)
    92     (%copy-ivector-to-ivector-words src dest (ash nbytes -3) (logand nbytes 7))))
    93 
    94 (defx86lapfunction %copy-ivector-to-ivector-words ((src 8)
    95                                                    #|(ra 0)|#
    96                                                    (dest arg_x)
    97                                                    (nwords arg_y)
    98                                                    (nbytes arg_z))
     92  (if (or (not (eq src dest))
     93          (< dest-byte-offset src-byte-offset)
     94          (>= dest-byte-offset (the fixnum (+ src-byte-offset nbytes))))
     95    (%copy-ivector-to-ivector-postincrement src src-byte-offset dest dest-byte-offset nbytes)
     96    (if (and (eq src dest)
     97             (eql src-byte-offset dest-byte-offset))
     98      dest
     99      (%copy-ivector-to-ivector-predecrement src
     100                                             (the fixnum (+ src-byte-offset nbytes))
     101                                             dest
     102                                             (the fixnum (+ dest-byte-offset nbytes))
     103                                             nbytes)))
     104  dest)
     105
     106(defun %copy-ivector-to-ivector-postincrement (src src-byte-offset dest dest-byte-offset nbytes)
     107  (declare (fixnum src-byte-offset dest-byte-offset nbytes))
     108 
     109  (cond ((or (< nbytes 8)
     110             (not (= (logand src-byte-offset 3)
     111                     (logand dest-byte-offset 3))))
     112         (%copy-ivector-to-ivector-postincrement-8bit src src-byte-offset dest dest-byte-offset nbytes))
     113        ((and (>= nbytes 80)
     114              (= (logand src-byte-offset 15)
     115                 (logand dest-byte-offset 15)))
     116         (let* ((prefix-size (- 16 (logand (the fixnum (+ src-byte-offset 8)) 15))))
     117           (declare (fixnum prefix-size))
     118           (unless (= 16 prefix-size)
     119             (%copy-ivector-to-ivector-postincrement-8bit src src-byte-offset dest dest-byte-offset prefix-size)
     120             (incf src-byte-offset prefix-size)
     121             (incf dest-byte-offset prefix-size)
     122             (decf nbytes prefix-size)))
     123         (let* ((tail-size (logand nbytes 15))
     124                (dqsize (- nbytes tail-size)))
     125           (declare (fixnum tail-size dqsize))
     126           (%copy-ivector-to-ivector-postincrement-128bit src src-byte-offset dest dest-byte-offset dqsize)
     127           (unless (zerop tail-size)
     128             (%copy-ivector-to-ivector-postincrement-8bit src (the fixnum (+ src-byte-offset dqsize)) dest (the fixnum (+ dest-byte-offset dqsize)) tail-size))))
     129        ((= (logand src-byte-offset 7) (logand dest-byte-offset 7))
     130         (let* ((prefix-size (- 8 (logand src-byte-offset 7))))
     131           (declare (fixnum prefix-size))
     132           (unless (= 8 prefix-size)
     133             (%copy-ivector-to-ivector-postincrement-8bit src src-byte-offset dest dest-byte-offset prefix-size)
     134             (incf src-byte-offset prefix-size)
     135             (incf dest-byte-offset prefix-size)
     136             (decf nbytes prefix-size)))
     137         (let* ((tail-size (logand nbytes 7))
     138                (fullword-size (- nbytes tail-size)))
     139           (declare (fixnum tail-size fullword-size))
     140           (unless (zerop fullword-size)
     141             (%copy-ivector-to-ivector-postincrement-64bit src src-byte-offset dest dest-byte-offset fullword-size))
     142           (unless (zerop tail-size)
     143             (%copy-ivector-to-ivector-postincrement-8bit src (the fixnum (+ src-byte-offset fullword-size)) dest (the fixnum (+ dest-byte-offset fullword-size)) tail-size))))
     144        (t
     145         (let* ((prefix-size (- 4 (logand src-byte-offset 3))))
     146           (declare (fixnum prefix-size))
     147           (unless (= 4 prefix-size)
     148             (%copy-ivector-to-ivector-postincrement-8bit src src-byte-offset dest dest-byte-offset prefix-size)
     149             (incf src-byte-offset prefix-size)
     150             (incf dest-byte-offset prefix-size)
     151             (decf nbytes prefix-size)))
     152         (let* ((tail-size (logand nbytes 3))
     153                (fullword-size (- nbytes tail-size)))
     154           (declare (fixnum tail-size fullword-size))
     155           (unless (zerop fullword-size)
     156             (%copy-ivector-to-ivector-postincrement-32bit src src-byte-offset dest dest-byte-offset fullword-size))
     157           (unless (zerop tail-size)
     158             (%copy-ivector-to-ivector-postincrement-8bit src (the fixnum (+ src-byte-offset fullword-size)) dest (the fixnum (+ dest-byte-offset fullword-size)) tail-size))))))
     159
     160(defun %copy-ivector-to-ivector-predecrement (src src-byte-offset dest dest-byte-offset nbytes)
     161  (declare (fixnum src-byte-offset dest-byte-offset nbytes))
     162  (cond ((or (< nbytes 8)
     163             (not (= (logand src-byte-offset 3)
     164                     (logand dest-byte-offset 3))))
     165         (%copy-ivector-to-ivector-predecrement-8bit src src-byte-offset dest dest-byte-offset nbytes))
     166        ((and (>= nbytes 80)
     167              (= (logand src-byte-offset 15)
     168                 (logand dest-byte-offset 15)))
     169      (let* ((suffix-size (logand src-byte-offset 15)))
     170        (declare (fixnum suffix-size))
     171        (unless (zerop suffix-size)
     172          (%copy-ivector-to-ivector-predecrement-8bit src src-byte-offset dest dest-byte-offset suffix-size)
     173          (decf src-byte-offset suffix-size)
     174          (decf dest-byte-offset suffix-size)
     175          (decf nbytes suffix-size)))
     176      (let* ((head-size (logand nbytes 15))
     177             (fullword-size (- nbytes head-size)))
     178        (declare (fixnum head-size fullword-size))
     179        (unless (zerop fullword-size)
     180          (%copy-ivector-to-ivector-predecrement-128bit src src-byte-offset dest dest-byte-offset fullword-size))
     181        (unless (zerop head-size)
     182          (%copy-ivector-to-ivector-predecrement-8bit src (the fixnum (- src-byte-offset fullword-size)) dest (the fixnum (- dest-byte-offset fullword-size)) head-size))))
     183    ((= (logand src-byte-offset 7) (logand dest-byte-offset 7))
     184      (let* ((suffix-size (logand src-byte-offset 7)))
     185        (declare (fixnum suffix-size))
     186        (unless (zerop suffix-size)
     187          (%copy-ivector-to-ivector-predecrement-8bit src src-byte-offset dest dest-byte-offset suffix-size)
     188          (decf src-byte-offset suffix-size)
     189          (decf dest-byte-offset suffix-size)
     190          (decf nbytes suffix-size)))
     191      (let* ((head-size (logand nbytes 7))
     192             (fullword-size (- nbytes head-size)))
     193        (declare (fixnum head-size fullword-size))
     194        (unless (zerop fullword-size)
     195          (%copy-ivector-to-ivector-predecrement-64bit src src-byte-offset dest dest-byte-offset fullword-size))
     196        (unless (zerop head-size)
     197          (%copy-ivector-to-ivector-predecrement-8bit src (the fixnum (- src-byte-offset fullword-size)) dest (the fixnum (- dest-byte-offset fullword-size)) head-size))))
     198    (t
     199      (let* ((suffix-size (logand src-byte-offset 3)))
     200        (declare (fixnum suffix-size))
     201        (unless (zerop suffix-size)
     202          (%copy-ivector-to-ivector-predecrement-8bit src src-byte-offset dest dest-byte-offset suffix-size)
     203          (decf src-byte-offset suffix-size)
     204          (decf dest-byte-offset suffix-size)
     205          (decf nbytes suffix-size)))
     206      (let* ((head-size (logand nbytes 3))
     207             (fullword-size (- nbytes head-size)))
     208        (declare (fixnum head-size fullword-size))
     209        (unless (zerop fullword-size)
     210          (%copy-ivector-to-ivector-predecrement-32bit src src-byte-offset dest dest-byte-offset fullword-size))
     211        (unless (zerop head-size)
     212          (%copy-ivector-to-ivector-predecrement-8bit src (the fixnum (- src-byte-offset fullword-size)) dest (the fixnum (- dest-byte-offset fullword-size)) head-size))))))
     213
     214(defx86lapfunction %copy-ivector-to-ivector-postincrement-8bit ((src 16) (src-byte-offset 8) #||(ra 0)||# (dest arg_x) (dest-byte-offset arg_y) (nbytes arg_z))
    99215  (let ((rsrc temp0)
    100          (ridx imm1)
    101          (rval imm0))
    102     (xorl (%l ridx) (%l ridx))
     216        (srcidx imm0)
     217        (destidx imm1)
     218        (data imm2))
    103219    (movq (@ src (% rsp)) (% rsrc))
    104     (jmp @word-test)
    105     @word-loop
    106     (movq (@ x8664::misc-data-offset (% rsrc) (% ridx)) (% rval))
    107     (movq (% rval) (@ x8664::misc-data-offset (% dest) (% ridx)))
    108     (addq ($ 8) (% ridx))
    109     @word-test
    110     (cmpq (% ridx) (% nwords))
    111     (jne @word-loop)
    112     (jmp @byte-test)
    113     @byte-loop
    114     (movb (@ x8664::misc-data-offset (% rsrc) (% ridx)) (%b rval))
    115     (movb (%b rval) (@ x8664::misc-data-offset (% dest) (% ridx)))
    116     (addq ($ 1) (% ridx))
    117     @byte-test
     220    (movq (@ src-byte-offset (% rsp)) (% srcidx))
     221    (sarq ($ target::fixnumshift) (% srcidx))
     222    (movq (% dest-byte-offset) (% destidx))
     223    (sarq ($ target::fixnumshift) (% destidx))
     224    (jmp @test)
     225    @loop
     226    (movzbl (@ target::misc-data-offset (% rsrc) (% srcidx)) (%l data))
     227    (movb (%b data) (@ target::misc-data-offset (% dest) (% destidx)))
     228    (lea (@ 1 (% destidx)) (% destidx))
     229    (lea (@ 1 (% srcidx)) (% srcidx))
     230    @test
    118231    (subq ($ '1) (% nbytes))
    119     (jns @byte-loop)
    120     (movq (% dest) (% arg_z))
    121     (single-value-return 3)))
    122          
    123    
    124    
    125 
    126 (defx86lapfunction %copy-ivector-to-ivector-bytes ((src-offset 16)
    127                                                    (src-byte-offset 8)
    128                                                    #|(ra 0)|#
    129                                                    (dest arg_x)
    130                                                    (dest-byte-offset arg_y)
    131                                                    (nbytes arg_z))
    132   (let ((rsrc temp0)
    133         (rsrc-byte-offset temp1))
    134     (movq (@ src-byte-offset (% rsp)) (% rsrc-byte-offset))
    135     (movq (@ src-offset (% rsp)) (% rsrc))
    136     (cmpq (% dest) (% rsrc))
    137     (jne @front)
    138     (cmpq (% rsrc-byte-offset) (% dest-byte-offset))
    139     (jg @back)
    140     @front
    141     (testq (% nbytes) (% nbytes))
    142     (jmp @front-test)
    143     @front-loop
    144     (unbox-fixnum rsrc-byte-offset imm0)
    145     (addq ($ '1) (% rsrc-byte-offset))
    146     (movb (@ x8664::misc-data-offset (% rsrc) (% imm0)) (%b imm0))
    147     (unbox-fixnum dest-byte-offset imm1)
    148     (addq ($ '1) (% dest-byte-offset))
    149     (movb (%b imm0) (@ x8664::misc-data-offset (% dest) (% imm1)))
    150     (subq ($ '1) (% nbytes))
    151     @front-test
    152     (jne @front-loop)
    153     (movq (% dest) (% arg_z))
    154     (single-value-return 4)
    155     @back
    156     (addq (% nbytes) (% rsrc-byte-offset))
    157     (addq (% nbytes) (% dest-byte-offset))
    158     (testq (% nbytes) (% nbytes))
    159     (jmp @back-test)
    160     @back-loop
    161     (subq ($ '1) (% rsrc-byte-offset))
    162     (unbox-fixnum rsrc-byte-offset imm0)
    163     (movb (@ x8664::misc-data-offset (% rsrc) (% imm0)) (%b imm0))
    164     (subq ($ '1) (% dest-byte-offset))
    165     (unbox-fixnum dest-byte-offset imm1)
    166     (subq ($ '1) (% nbytes))
    167     (movb (%b imm0) (@ x8664::misc-data-offset (% dest) (% imm1)))
    168     @back-test
    169     (jne @back-loop)
     232    (jge @loop)
    170233    (movq (% dest) (% arg_z))
    171234    (single-value-return 4)))
    172  
     235
     236
     237
     238(defx86lapfunction %copy-ivector-to-ivector-predecrement-8bit ((src 16) (src-byte-offset 8) #||(ra 0)||# (dest arg_x) (dest-byte-offset arg_y) (nbytes arg_z))
     239  (let ((rsrc temp0)
     240        (srcidx imm0)
     241        (destidx imm1)
     242        (data imm2))
     243    (movq (@ src (% rsp)) (% rsrc))
     244    (movq (@ src-byte-offset (% rsp)) (% srcidx))
     245    (sarq ($ target::fixnumshift) (% srcidx))
     246    (movq (% dest-byte-offset) (% destidx))
     247    (sarq ($ target::fixnumshift) (% destidx))
     248    (jmp @test)
     249    @loop
     250    (lea (@ -1 (% destidx)) (% destidx))
     251    (lea (@ -1 (% srcidx)) (% srcidx))
     252    (movzbl (@ target::misc-data-offset (% rsrc) (% srcidx)) (%l data))
     253    (movb (%b data) (@ target::misc-data-offset (% dest) (% destidx)))
     254    @test
     255    (subq ($ '1) (% nbytes))
     256    (jge @loop)
     257    (movq (% dest) (% arg_z))
     258    (single-value-return 4)))
     259
     260
     261(defx86lapfunction %copy-ivector-to-ivector-postincrement-32bit ((src 16) (src-byte-offset 8) #||(ra 0)||# (dest arg_x) (dest-byte-offset arg_y) (nbytes arg_z))
     262  (let ((rsrc temp0)
     263        (srcidx imm0)
     264        (destidx imm1)
     265        (data imm2))
     266    (movq (@ src (% rsp)) (% rsrc))
     267    (movq (@ src-byte-offset (% rsp)) (% srcidx))
     268    (sarq ($ target::fixnumshift) (% srcidx))
     269    (movq (% dest-byte-offset) (% destidx))
     270    (sarq ($ target::fixnumshift) (% destidx))
     271    (jmp @test)
     272    @loop
     273    (movl (@ target::misc-data-offset (% rsrc) (% srcidx)) (%l data))
     274    (movl (%l data) (@ target::misc-data-offset (% dest) (% destidx)))
     275    (lea (@ 4 (% destidx)) (% destidx))
     276    (lea (@ 4 (% srcidx)) (% srcidx))
     277    @test
     278    (subq ($ '4) (% nbytes))
     279    (jge @loop)
     280    (movq (% dest) (% arg_z))
     281    (single-value-return 4)))
     282
     283(defx86lapfunction %copy-ivector-to-ivector-predecrement-32bit ((src 16) (src-byte-offset 8) #||(ra 0)||# (dest arg_x) (dest-byte-offset arg_y) (nbytes arg_z))
     284  (let ((rsrc temp0)
     285        (srcidx imm0)
     286        (destidx imm1)
     287        (data imm2))
     288    (movq (@ src (% rsp)) (% rsrc))
     289    (movq (@ src-byte-offset (% rsp)) (% srcidx))
     290    (sarq ($ target::fixnumshift) (% srcidx))
     291    (movq (% dest-byte-offset) (% destidx))
     292    (sarq ($ target::fixnumshift) (% destidx))
     293    (jmp @test)
     294    @loop
     295    (lea (@ -4 (% destidx)) (% destidx))
     296    (lea (@ -4 (% srcidx)) (% srcidx))
     297    (movl (@ target::misc-data-offset (% rsrc) (% srcidx)) (%l data))
     298    (movl (%l data) (@ target::misc-data-offset (% dest) (% destidx)))
     299    @test
     300    (subq ($ '4) (% nbytes))
     301    (jge @loop)
     302    (movq (% dest) (% arg_z))
     303    (single-value-return 4)))
     304
     305(defx86lapfunction %copy-ivector-to-ivector-postincrement-64bit ((src 16) (src-byte-offset 8) #||(ra 0)||# (dest arg_x) (dest-byte-offset arg_y) (nbytes arg_z))
     306  (let ((rsrc temp0)
     307        (srcidx temp1)
     308        (destidx dest-byte-offset)
     309        (data0 imm0)
     310        (data1 imm1))
     311    (movq (@ src (% rsp)) (% rsrc))
     312    (movq (@ src-byte-offset (% rsp)) (% srcidx))
     313    ;; srcidx and destidx are multiples of 8, so it's safe to right-shift
     314    ;; them here (they remain fixnums).
     315    (sarq ($ target::word-shift) (% srcidx))
     316    (sarq ($ target::word-shift) (% destidx))
     317    (testq ($ '8) (% nbytes))
     318    (jz @test)
     319    (movq (@ target::misc-data-offset (% rsrc) (% srcidx)) (% data0))
     320    (movq (% data0) (@ target::misc-data-offset (% dest) (% destidx)))
     321    (lea (@ 8 (% destidx)) (% destidx))
     322    (lea (@ 8 (% srcidx)) (% srcidx))
     323    (subq ($ '8) (% nbytes))   
     324    (jmp @test)
     325    @loop
     326    (movq (@ target::misc-data-offset (% rsrc) (% srcidx)) (% data0))
     327    (movq (@ (+ 8 target::misc-data-offset) (% rsrc) (% srcidx)) (% data1))
     328    (movq (% data0) (@ target::misc-data-offset (% dest) (% destidx)))
     329    (movq (% data1) (@ (+ 8 target::misc-data-offset) (% dest) (% destidx)))
     330    (lea (@ 16 (% destidx)) (% destidx))
     331    (lea (@ 16 (% srcidx)) (% srcidx))
     332    @test
     333    (subq ($ '16) (% nbytes))
     334    (jge @loop)
     335    (movq (% dest) (% arg_z))
     336    (single-value-return 4)))
     337
     338(defx86lapfunction %copy-ivector-to-ivector-predecrement-64bit ((src 16) (src-byte-offset 8) #||(ra 0)||# (dest arg_x) (dest-byte-offset arg_y) (nbytes arg_z))
     339  (let ((rsrc temp0)
     340        (srcidx temp1)
     341        (destidx dest-byte-offset)
     342        (data0 imm0)
     343        (data1 imm1))
     344    (movq (@ src (% rsp)) (% rsrc))
     345    (movq (@ src-byte-offset (% rsp)) (% srcidx))
     346    ;; srcidx and destidx are multiples of 8, so it's safe to right-shift
     347    ;; them here (they remain fixnums).
     348    (sarq ($ target::word-shift) (% srcidx))
     349    (sarq ($ target::word-shift) (% destidx))
     350    (testq ($ '8) (% nbytes))
     351    (jz @test)
     352    (lea (@ -8 (% destidx)) (% destidx))
     353    (lea (@ -8 (% srcidx)) (% srcidx))
     354    (movq (@ target::misc-data-offset (% rsrc) (% srcidx)) (% data0))
     355    (movq (% data0) (@ target::misc-data-offset (% dest) (% destidx)))
     356    (subq ($ '8) (% nbytes))   
     357    (jmp @test)
     358    @loop
     359    (lea (@ -16 (% destidx)) (% destidx))
     360    (lea (@ -16 (% srcidx)) (% srcidx))
     361    (movq (@ target::misc-data-offset (% rsrc) (% srcidx)) (% data0))
     362    (movq (@ (+ 8 target::misc-data-offset) (% rsrc) (% srcidx)) (% data1))
     363    (movq (% data0) (@ target::misc-data-offset (% dest) (% destidx)))
     364    (movq (% data1) (@ (+ 8 target::misc-data-offset) (% dest) (% destidx)))
     365    @test
     366    (subq ($ '16) (% nbytes))
     367    (jge @loop)
     368    (movq (% dest) (% arg_z))
     369    (single-value-return 4)))
     370
     371(defx86lapfunction %copy-ivector-to-ivector-postincrement-128bit ((src 16) (src-byte-offset 8) #||(ra 0)||# (dest arg_x) (dest-byte-offset arg_y) (nbytes arg_z))
     372  (let ((rsrc temp0)
     373        (srcidx imm0)
     374        (destidx imm1))
     375    (movq (@ src (% rsp)) (% rsrc))
     376    (movq (@ src-byte-offset (% rsp)) (% srcidx))
     377    (sarq ($ target::fixnumshift) (% srcidx))
     378    (movq (% dest-byte-offset) (% destidx))
     379    (sarq ($ target::fixnumshift) (% destidx))
     380    (jmp @test)
     381    @loop
     382    (movdqa (@ target::misc-data-offset (% rsrc) (% srcidx)) (% xmm0))
     383    (movdqa (@ (+ 16 target::misc-data-offset) (% rsrc) (% srcidx)) (% xmm1))
     384    (movdqa (@ (+ 32 target::misc-data-offset) (% rsrc) (% srcidx)) (% xmm2))
     385    (movdqa (@ (+ 48 target::misc-data-offset) (% rsrc) (% srcidx)) (% xmm3))
     386    (movdqa (% xmm0) (@ target::misc-data-offset (% dest) (% destidx)))
     387    (movdqa (% xmm1) (@ (+ 16 target::misc-data-offset) (% dest) (% destidx)))
     388    (movdqa (% xmm2) (@ (+ 32 target::misc-data-offset) (% dest) (% destidx)))
     389    (movdqa (% xmm3) (@ (+ 48 target::misc-data-offset) (% dest) (% destidx)))
     390    (lea (@ 64 (% destidx)) (% destidx))
     391    (lea (@ 64 (% srcidx)) (% srcidx))
     392    (subq ($ '64) (% nbytes))
     393    @test
     394    (cmpq ($ '64) (% nbytes))
     395    (jge @loop)
     396    (testq (% nbytes) (% nbytes))
     397    (je @done)
     398    (cmpq ($ '32) (% nbytes))
     399    (je @two)
     400    (jl @one)
     401    (movdqa (@ target::misc-data-offset (% rsrc) (% srcidx)) (% xmm0))
     402    (movdqa (% xmm0) (@ target::misc-data-offset (% dest) (% destidx)))
     403    (lea (@ 16 (% srcidx)) (% srcidx))
     404    (lea (@ 16 (% destidx)) (% destidx))
     405    @two
     406    (movdqa (@ target::misc-data-offset (% rsrc) (% srcidx)) (% xmm0))
     407    (movdqa (% xmm0) (@ target::misc-data-offset (% dest) (% destidx)))
     408    (lea (@ 16 (% srcidx)) (% srcidx))
     409    (lea (@ 16 (% destidx)) (% destidx))
     410    @one
     411    (movdqa (@ target::misc-data-offset (% rsrc) (% srcidx)) (% xmm0))
     412    (movdqa (% xmm0) (@ target::misc-data-offset (% dest) (% destidx)))
     413    @done
     414    (movq (% dest) (% arg_z))
     415    (single-value-return 4)))
     416
     417(defx86lapfunction %copy-ivector-to-ivector-predecrement-128bit ((src 16) (src-byte-offset 8) #||(ra 0)||# (dest arg_x) (dest-byte-offset arg_y) (nbytes arg_z))
     418  (let ((rsrc temp0)
     419        (srcidx imm0)
     420        (destidx imm1))
     421    (movq (@ src (% rsp)) (% rsrc))
     422    (movq (@ src-byte-offset (% rsp)) (% srcidx))
     423    (sarq ($ target::fixnumshift) (% srcidx))
     424    (movq (% dest-byte-offset) (% destidx))
     425    (sarq ($ target::fixnumshift) (% destidx))
     426    (jmp @test)
     427    @loop
     428    (lea (@ -64 (% destidx)) (% destidx))
     429    (lea (@ -64 (% srcidx)) (% srcidx))
     430    (movdqa (@ target::misc-data-offset (% rsrc) (% srcidx)) (% xmm0))
     431    (movdqa (@ (+ 16 target::misc-data-offset) (% rsrc) (% srcidx)) (% xmm1))
     432    (movdqa (@ (+ 32 target::misc-data-offset) (% rsrc) (% srcidx)) (% xmm2))
     433    (movdqa (@ (+ 48 target::misc-data-offset) (% rsrc) (% srcidx)) (% xmm3))
     434    (movdqa (% xmm0) (@ target::misc-data-offset (% dest) (% destidx)))
     435    (movdqa (% xmm1) (@ (+ 16 target::misc-data-offset) (% dest) (% destidx)))
     436    (movdqa (% xmm2) (@ (+ 32 target::misc-data-offset) (% dest) (% destidx)))
     437    (movdqa (% xmm3) (@ (+ 48 target::misc-data-offset) (% dest) (% destidx)))
     438    (subq ($ '64) (% nbytes))
     439    @test
     440    (cmpq ($ '64) (% nbytes))
     441    (jge @loop)
     442    (testq (% nbytes) (% nbytes))
     443    (je @done)
     444    (cmpq ($ '32) (% nbytes))
     445    (je @two)
     446    (jl @one)
     447    (lea (@ -16 (% srcidx)) (% srcidx))
     448    (lea (@ -16 (% destidx)) (% destidx))
     449    (movdqa (@ target::misc-data-offset (% rsrc) (% srcidx)) (% xmm0))
     450    (movdqa (% xmm0) (@ target::misc-data-offset (% dest) (% destidx)))
     451    @two
     452    (lea (@ -16 (% srcidx)) (% srcidx))
     453    (lea (@ -16 (% destidx)) (% destidx))
     454    (movdqa (@ target::misc-data-offset (% rsrc) (% srcidx)) (% xmm0))
     455    (movdqa (% xmm0) (@ target::misc-data-offset (% dest) (% destidx)))
     456    @one
     457    (lea (@ -16 (% srcidx)) (% srcidx))
     458    (lea (@ -16 (% destidx)) (% destidx))
     459    (movdqa (@ target::misc-data-offset (% rsrc) (% srcidx)) (% xmm0))
     460    (movdqa (% xmm0) (@ target::misc-data-offset (% dest) (% destidx)))
     461    @done
     462    (movq (% dest) (% arg_z))
     463    (single-value-return 4)))
     464
    173465
    174466(defx86lapfunction %copy-gvector-to-gvector ((src (* 2 x8664::node-size))
Note: See TracChangeset for help on using the changeset viewer.