Index: /branches/x8664-call/ccl/compiler/X86/x86-lap.lisp
===================================================================
--- /branches/x8664-call/ccl/compiler/X86/x86-lap.lisp	(revision 6312)
+++ /branches/x8664-call/ccl/compiler/X86/x86-lap.lisp	(revision 6313)
@@ -344,8 +344,38 @@
     (new-frag frag-list)))
 
+;;; Make the current frag be of type :talign; set that frag-type's
+;;; argument to NIL initially.  Start a new frag of type :pending-talign;
+;;; that frag will contain at most one instruction.  When an
+;;; instuction is ouput in the pending-talign frag, adjust the preceding
+;;; :talign frag's argument and set the type of the :pending-talign
+;;; frag to NIL.  (The :talign frag will have 0-7 NOPs of some form
+;;; appended to it, so the first instruction in the successor will end
+;;; on an address that matches the argument below.)
+;;; That instruction can not be a relaxable branch.
+(defun finish-frag-for-talign (frag-list arg)
+  (let* ((current (frag-list-current frag-list))
+         (new (new-frag frag-list)))
+    (setf (frag-type current) (list :talign nil))
+    (setf (frag-type new) (list :pending-talign arg))))
+
+;;; Having generated an instruction in a :pending-talign frag, set the
+;;; frag-type argument of the preceding :talign frag to the :pendint-talign
+;;; frag's argument - the length of the pending-talign's first instruction
+;;; mod 8, and clear the type of the "pending" frag.
+;;; cadr of the frag-type 
+(defun finish-pending-talign-frag (frag-list)
+  (let* ((frag (frag-list-current frag-list))
+         (pred (frag-pred frag))
+         (arg (cadr (frag-type frag)))
+         (pred-arg (frag-type pred)))
+    (setf (cadr pred-arg) (logand 7 (- arg (frag-length frag)))
+          (frag-type frag) nil)
+    (new-frag frag-list)))
+
 (defun finish-frag-for-org (frag-list org)
   (let* ((frag (frag-list-current frag-list)))
     (setf (frag-type frag) (list :org org))
     (new-frag frag-list)))
+
 
 (defun lookup-x86-register (regname designator)
@@ -968,5 +998,8 @@
                    (if (logtest optype (x86::encode-operand-type :imm64))
                      (frag-list-push-64 frag-list val)
-                     (frag-list-push-32 frag-list val))))))))))))
+                     (frag-list-push-32 frag-list val))))))))))
+    (let* ((frag (frag-list-current frag-list)))
+      (if (eq (car (frag-type frag)) :pending-talign)
+        (finish-pending-talign-frag frag-list)))))
 
 (defun x86-lap-directive (frag-list directive arg)
@@ -997,4 +1030,5 @@
                 (:quad (frag-list-push-64 frag-list val))
                 (:align (finish-frag-for-align frag-list val))
+                (:talign (finish-frag-for-talign frag-list val))
                 (:org (finish-frag-for-org frag-list val))))
             (let* ((pos (frag-list-position frag-list))
@@ -1010,5 +1044,6 @@
                 (:quad (frag-list-push-64 frag-list 0)
                        (setq reloctype :expr64))
-                (:align (error ":align expression ~s not constant" arg)))
+                (:align (error ":align expression ~s not constant" arg))
+                (:talign (error ":talign expression ~s not constant" arg)))
               (when reloctype
                 (push
@@ -1062,4 +1097,10 @@
     (- (logandc2 (+ address mask) mask) address)))
 
+(defun relax-talign (address mask)
+  (do* ((i 0 (1+ i)))
+       ((= (logand address 7) mask) i)
+    (incf address)))
+
+
 (defun relax-frag-list (frag-list)
   ;; First, assign tentative addresses to all frags, assuming that
@@ -1082,4 +1123,10 @@
                 (:align
                  (incf address (relax-align address (cadr (frag-type frag)))))
+                (:talign
+                 (let* ((arg (cadr (frag-type frag))))
+                   (if (null arg)
+                     ;;; Never generated code in :pending-talign frag
+                     (setf (frag-type frag) nil)
+                     (incf address (relax-talign address arg)))))
                 ((:assumed-short-branch :assumed-short-conditional-branch)
                  (destructuring-bind (label pos reloc) (cdr (frag-type frag))
@@ -1132,4 +1179,10 @@
                     (oldoff (relax-align (+ was-address len) bits))
                     (newoff (relax-align (+ address len) bits)))
+               (setq growth (- newoff oldoff))))
+            (:talign
+             (let* ((arg (cadr fragtype))
+                    (len (frag-length frag))
+                    (oldoff (relax-talign (+ was-address len) arg))
+                    (newoff (relax-talign (+ address len) arg)))
                (setq growth (- newoff oldoff))))
             ;; If we discover - on any iteration - that a short
@@ -1207,4 +1260,16 @@
                              
 
+(defun frag-emit-nops (frag count)
+  (let* ((nnops (ash (+ count 3) -2))
+         (len (floor count nnops))
+         (remains (- count (* nnops len))))
+    (dotimes (i remains)
+      (dotimes (k len) (frag-push-byte frag #x66))
+      (frag-push-byte frag #x90))
+    (do* ((i remains (1+ i)))
+         ((= i nnops))
+      (dotimes (k (1- len)) (frag-push-byte frag #x66))
+      (frag-push-byte frag #x90))))
+  
 (defun fill-for-alignment (frag-list)
   (ccl::do-dll-nodes (frag frag-list)
@@ -1215,5 +1280,7 @@
                (pad (- nextaddr (+ addr (frag-length frag)))))
           (unless (eql 0 pad)
-            (dotimes (i pad) (frag-push-byte frag #xcc))))))))
+            (if (eq (car (frag-type frag)) :talign)
+              (frag-emit-nops frag pad)
+              (dotimes (i pad) (frag-push-byte frag #xcc)))))))))
 
 (defun show-frag-bytes (frag-list)
