Changeset 13423
- Timestamp:
- Jan 30, 2010, 3:18:00 AM (15 years ago)
- File:
-
- 1 edited
-
trunk/source/level-0/X86/X8664/x8664-bignum.lisp (modified) (2 diffs)
Legend:
- Unmodified
- Added
- Removed
-
trunk/source/level-0/X86/X8664/x8664-bignum.lisp
r13420 r13423 338 338 (movq (@ n (% rsp)) (% imm0)) 339 339 (shrq (% imm0)) 340 (jmp @test) 341 @loop 340 (testl ($ 4) (%l imm0)) 341 (je @check128) 342 (subq ($ 4) (% imm0)) 342 343 (movl (@ x8664::misc-data-offset (% a) (% imm0)) (%l imm1)) 343 344 (orl (@ x8664::misc-data-offset (% b) (% imm0)) (%l imm1)) 344 345 (movl (%l imm1) (@ x8664::misc-data-offset (% c) (% imm0))) 345 (subq ($ 4) (% imm0)) 346 @test 347 (jne @loop) 346 (jmp @check128) 347 @loop64 348 (movq (@ x8664::misc-data-offset (% a) (% imm0)) (% imm1)) 349 (orq (@ x8664::misc-data-offset (% b) (% imm0)) (% imm1)) 350 (movq (% imm1) (@ x8664::misc-data-offset (% c) (% imm0))) 351 @test64 352 (subq ($ 8) (% imm0)) 353 (jge @loop64) 354 (single-value-return 3) 355 ;; See if we can do some of this using the SSE2 hardware. 356 ;; That's only possible if we have 6 or more words. 357 @check128 358 (rcmpq (% imm0) ($ (* 6 4))) 359 (jl @test64) 360 ;; We'll have to do the first 2 words in a 64-bit operation. 361 ;; If the total number of words is a multiple of 4, we have 362 ;; to do the last 2 words without using SSE2, as well. 363 (testl ($ 8) (%l imm0)) 364 (jne @test128) 365 (movq (@ (- x8664::misc-data-offset 8) (% a) (% imm0)) (% imm1)) 366 (orq (@ (- x8664::misc-data-offset 8) (% b) (% imm0)) (% imm1)) 367 (movq (% imm1) (@ (- x8664::misc-data-offset 8) (% c) (% imm0))) 368 (subq ($ (+ 16 8)) (% imm0)) 369 @loop128 370 (movaps (@ x8664::misc-data-offset (% a) (% imm0)) (% xmm0)) 371 (por (@ x8664::misc-data-offset (% b) (% imm0)) (% xmm0)) 372 (movaps (% xmm0) (@ x8664::misc-data-offset (% c) (% imm0))) 373 @test128 374 (subq ($ 16) (% imm0)) 375 (jg @loop128) 376 (movq (@ (- x8664::misc-data-offset 8) (% a)) (% imm1)) 377 (orq (@ (- x8664::misc-data-offset 8) (% b)) (% imm1)) 378 (movq (% imm1) (@ (- x8664::misc-data-offset 8) (% c))) 348 379 (single-value-return 3)) 349 380 … … 356 387 (movq (@ n (% rsp)) (% imm0)) 357 388 (shrq (% imm0)) 358 (jmp @test) 359 @loop 389 (testl ($ 4) (%l imm0)) 390 (je @check128) 391 (subq ($ 4) (% imm0)) 360 392 (movl (@ x8664::misc-data-offset (% a) (% imm0)) (%l imm1)) 361 393 (andl (@ x8664::misc-data-offset (% b) (% imm0)) (%l imm1)) 362 394 (movl (%l imm1) (@ x8664::misc-data-offset (% c) (% imm0))) 363 (subq ($ 4) (% imm0)) 364 @test 365 (jne @loop) 395 (jmp @check128) 396 @loop64 397 (movq (@ x8664::misc-data-offset (% a) (% imm0)) (% imm1)) 398 (andq (@ x8664::misc-data-offset (% b) (% imm0)) (% imm1)) 399 (movq (% imm1) (@ x8664::misc-data-offset (% c) (% imm0))) 400 @test64 401 (subq ($ 8) (% imm0)) 402 (jge @loop64) 403 (single-value-return 3) 404 ;; See if we can do some of this using the SSE2 hardware. 405 ;; That's only possible if we have 6 or more words. 406 @check128 407 (rcmpq (% imm0) ($ (* 6 4))) 408 (jl @test64) 409 ;; We'll have to do the first 2 words in a 64-bit operation. 410 ;; If the total number of words is a multiple of 4, we have 411 ;; to do the last 2 words without using SSE2, as well. 412 (testl ($ 8) (%l imm0)) 413 (jne @test128) 414 (movq (@ (- x8664::misc-data-offset 8) (% a) (% imm0)) (% imm1)) 415 (andq (@ (- x8664::misc-data-offset 8) (% b) (% imm0)) (% imm1)) 416 (movq (% imm1) (@ (- x8664::misc-data-offset 8) (% c) (% imm0))) 417 (subq ($ (+ 16 8)) (% imm0)) 418 @loop128 419 (movaps (@ x8664::misc-data-offset (% a) (% imm0)) (% xmm0)) 420 (pand (@ x8664::misc-data-offset (% b) (% imm0)) (% xmm0)) 421 (movaps (% xmm0) (@ x8664::misc-data-offset (% c) (% imm0))) 422 @test128 423 (subq ($ 16) (% imm0)) 424 (jg @loop128) 425 (movq (@ (- x8664::misc-data-offset 8) (% a)) (% imm1)) 426 (and (@ (- x8664::misc-data-offset 8) (% b)) (% imm1)) 427 (movq (% imm1) (@ (- x8664::misc-data-offset 8) (% c))) 366 428 (single-value-return 3)) 367 429
Note:
See TracChangeset
for help on using the changeset viewer.
