Index: /trunk/source/compiler/X86/x86-asm.lisp
===================================================================
--- /trunk/source/compiler/X86/x86-asm.lisp	(revision 15155)
+++ /trunk/source/compiler/X86/x86-asm.lisp	(revision 15156)
@@ -1853,10 +1853,18 @@
 
    ;; movdqa
-   (def-x86-opcode (movdqa :cpu64)  ((:regxmm :insert-xmm-reg) (:anymem :insert-memory))
+   (def-x86-opcode movdqa  ((:regxmm :insert-xmm-reg) (:anymem :insert-memory))
      #x0f7f #o300 #x0 #x66)
-   (def-x86-opcode (movdqa :cpu64) ((:anymem :insert-memory) (:regxmm :insert-xmm-reg)) 
+   (def-x86-opcode movdqa ((:anymem :insert-memory) (:regxmm :insert-xmm-reg)) 
      #x0f6f #o000 #x0 #x66)
     
-
+   (def-x86-opcode movdqu  ((:regxmm :insert-xmm-reg) (:anymem :insert-memory))
+     #x0f7f #o300 #x0 #xf3)
+   (def-x86-opcode movdqu ((:anymem :insert-memory) (:regxmm :insert-xmm-reg)) 
+     #x0f6f #o000 #x0 #xf3)
+    
+
+   ;; sign-extending mov
+   (def-x86-opcode movsbl ((:reg8 :insert-modrm-rm) (:reg32 :insert-modrm-reg))
+     #x0fbe #o300 0)
    ;; sign-extending mov
    (def-x86-opcode movsbl ((:reg8 :insert-modrm-rm) (:reg32 :insert-modrm-reg))
Index: /trunk/source/compiler/X86/x862.lisp
===================================================================
--- /trunk/source/compiler/X86/x862.lisp	(revision 15155)
+++ /trunk/source/compiler/X86/x862.lisp	(revision 15156)
@@ -10597,5 +10597,8 @@
                 (x862-one-targeted-reg-form seg other other-reg)
                 (! %natural-logand-c  other-reg constant)
-                (<- other-reg))))
+                (if (and (typep constant *nx-target-fixnum-type*)
+                         (node-reg-p vreg))
+                  (! box-fixnum vreg other-reg)
+                  (<- other-reg)))))
           (^))))))
 
Index: /trunk/source/level-0/X86/X8632/x8632-misc.lisp
===================================================================
--- /trunk/source/level-0/X86/X8632/x8632-misc.lisp	(revision 15155)
+++ /trunk/source/level-0/X86/X8632/x8632-misc.lisp	(revision 15156)
@@ -21,4 +21,55 @@
 ;;; a byte at a time.
 ;;; Does no arg checking of any kind.  Really.
+(defun %copy-ptr-to-ivector (src src-byte-offset dest dest-byte-offset nbytes)
+  (declare (fixnum src-byte-offset dest-byte-offset nbytes)
+           (optimize (speed 3) (safety 0)))
+  (let* ((ptr-align (logand 7 (%ptr-to-int src))))
+    (declare (type (mod 8) ptr-align))
+    (if (and (= 0 (logand nbytes 3))
+             (= 0 (logand dest-byte-offset 3))
+             (= 0 (logand (the fixnum (+ ptr-align src-byte-offset)) 3)))
+      (%copy-ptr-to-ivector-32bit src src-byte-offset dest dest-byte-offset nbytes)
+      (%copy-ptr-to-ivector-8bit src src-byte-offset dest dest-byte-offset nbytes))
+    dest))
+
+;;; We can exploit the fact that SRC-BYTE-OFFSET and DEST-BYTE-OFFSET
+;;; are both multiples of 4 (and therefore still fixnums when unboxed).
+(defx8632lapfunction %copy-ptr-to-ivector-32bit ((psrc 12)
+                                                 (psrc-byte-offset 8)
+                                                 (pdest 4)
+                                                 #|(ra 0)|#
+                                                 (dest-byte-offset arg_y)
+                                                 (nbytes arg_z))
+
+  (let ((foreign-ptr imm0)		;raw foreign pointer
+	(ivector temp1))                ;destination ivector
+    (movl (@ psrc (% esp)) (% temp1))
+    (movl (@ psrc-byte-offset (% esp)) (% foreign-ptr))
+    (sarl ($ x8632::word-shift)(% foreign-ptr))
+    (addl (@ x8632::macptr.address (% temp1)) (% foreign-ptr))
+    (movl (@ pdest (% esp)) (% ivector))
+    (sarl ($ x8632::word-shift) (% dest-byte-offset))
+    (jmp @test16)
+    @loop16
+    (movdqu (@ (% foreign-ptr)) (% xmm0))
+    (movdqu (% xmm0) (@ x8632::misc-data-offset (% ivector) (% dest-byte-offset)))
+    (addl ($ 16) (% foreign-ptr))
+    (addl ($ 16) (% dest-byte-offset))
+    (subl ($ '16) (% nbytes))
+    @test16
+    (cmpl ($ '16) (% nbytes))
+    (jge @loop16)
+    (testl (% nbytes) (% nbytes))
+    (je @done)
+    @loop4
+    (movd (@ (% foreign-ptr)) (% mm0))
+    (movd (% mm0) (@ x8632::misc-data-offset (% ivector) (% dest-byte-offset)))
+    (addl ($ 4) (% foreign-ptr))
+    (addl ($ 4) (% dest-byte-offset))
+    (subl ($ '4) (% nbytes))
+    (jne @loop4)
+    @done
+    (movl (% ivector) (% arg_z))
+    (single-value-return 5)))
 
 ;;; I went ahead and used the INC and DEC instructions here, since
@@ -29,10 +80,10 @@
 ;;; might be worth a try.
 
-(defx8632lapfunction %copy-ptr-to-ivector ((src 12)
-					   (src-byte-offset 8)
-					   (dest 4)
-					   #|(ra 0)|#
-					   (dest-byte-offset arg_y)
-					   (nbytes arg_z))
+(defx8632lapfunction %copy-ptr-to-ivector-8bit ((src 12)
+                                                (src-byte-offset 8)
+                                                (dest 4)
+                                                #|(ra 0)|#
+                                                (dest-byte-offset arg_y)
+                                                (nbytes arg_z))
   (mark-as-imm temp0)
   (mark-as-imm arg_y)
