Ignore:
Timestamp:
Oct 13, 2006, 1:45:47 AM (13 years ago)
Author:
gb
Message:

Handle newline translation differenly (at the character I/O level, not the buffer level).

All character encoding/decoding functions operate on octets, not necessarily code
units. (The stream encode/decode functions are an exception; serialization and
byte ordering are handled by the stream.)

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/ccl/level-1/l1-sysio.lisp

    r5320 r5335  
    2020  (octet-pos 0 :type fixnum)            ; current io position in octets
    2121  (fileeof 0 :type fixnum)              ; file length in elements
    22   (input-filter nil)
    23   (output-filter nil)
    24   (line-termination :unix))
    25 
    26 
    27 (defun install-line-termination-filters (file-ioblock line-termination in-p out-p)
    28   (let* ((inferred-macos nil))
    29     (if (eq line-termination :inferred)
    30       (if in-p
    31         (if (eq (setq line-termination (infer-line-termination file-ioblock))
    32                 :macos)
    33           (setq inferred-macos t))
    34         (setq line-termination :unix)))
    35     (setf (file-ioblock-line-termination file-ioblock) line-termination)
    36     (when (eq line-termination :macos)
    37       (let* ((encoding (or (file-ioblock-encoding file-ioblock)
    38                            (get-character-encoding nil)))
    39              (element-size (character-encoding-code-unit-size encoding))
    40              (native-byte-order (ioblock-native-byte-order file-ioblock)))
    41         (when in-p
    42           (setf (file-ioblock-input-filter file-ioblock)
    43                 (case element-size
    44                   (8 'u8-translate-cr-to-lf)
    45                   (16 (if #+big-endian-target native-byte-order
    46                           #+little-endian-target (not native-byte-order)
    47                         'big-endian-u16-translate-cr-to-lf
    48                         'little-endian-swapped-u16-translate-cr-to-lf))
    49                   (32 (if #+big-endian-target native-byte-order
    50                           #+little-endian-target (not native-byte-order)
    51                         'big-endian-u32-translate-cr-to-lf
    52                         'little-endian-swapped-u32-translate-cr-to-lf))))
    53           (if inferred-macos
    54             (let* ((inbuf (file-ioblock-inbuf file-ioblock)))
    55               (funcall (file-ioblock-input-filter file-ioblock)
    56                        (io-buffer-buffer inbuf)
    57                        (io-buffer-count inbuf)))))
    58         (when out-p
    59           (setf (file-ioblock-output-filter file-ioblock)
    60                 (case element-size
    61                   (8 'u8-translate-lf-to-cr)
    62                   (16 (if native-byte-order
    63                         'u16-translate-lf-to-cr
    64                         'swapped-u16-translate-lf-to-cr))
    65                   (32 (if native-byte-order
    66                         'u32-translate-lf-to-cr
    67                         'swapped-u32-translate-lf-to-cr)))))
    68         line-termination))))
     22  )
     23
     24
     25
    6926
    7027;;; The file-ioblock-octet-pos field is the (octet) position
     
    8239        (file-octet-filepos file-ioblock)))
    8340
    84 (defun translate-cr-to-lf (file-ioblock)
    85   (let* ((inbuf (file-ioblock-inbuf file-ioblock))
    86          (string (io-buffer-buffer inbuf))
    87          (n (io-buffer-count inbuf)))
    88     (declare (simple-base-string string)
    89              (fixnum n))
    90     (dotimes (i n n)
    91       (if (eq (schar string i) #\Return)
    92         (setf (schar string i) #\Linefeed)))))
    93 
    94 (defun translate-lf-to-cr (file-ioblock n)
    95   (declare (fixnum n))
    96   (let* ((outbuf (file-ioblock-outbuf file-ioblock))
    97          (string (io-buffer-buffer outbuf)))
    98     (declare (simple-base-string string))
    99     (dotimes (i n n)
    100       (if (eq (schar string i) #\Linefeed)
    101         (setf (schar string i) #\Return)))))
    102 
    10341(defun infer-line-termination (file-ioblock)
    10442  (let* ((encoding (or (file-ioblock-encoding file-ioblock)
    10543                       (get-character-encoding nil)))
    106          (unit-size (character-encoding-code-unit-size encoding))
    107          (cr (char-code #\Return))
    108          (lf (char-code #\linefeed))
    10944         (inbuf (file-ioblock-inbuf file-ioblock))
    11045         (buffer (io-buffer-buffer inbuf))
    11146         (n (io-buffer-count inbuf)))
    112     (cond ((= unit-size 8)
    113            (if (zerop n)
    114              (setq n (fd-stream-advance (file-ioblock-stream file-ioblock)
    115                                         file-ioblock
    116                                         t)))
    117      
    118      
    119            (do* ((i 0 (+ i 1))
    120                  (code))
    121                 ((= i n) :unix)
    122              (setq code (aref buffer i))           
    123              (if (= code cr)
    124                (return :macos)
    125                (if (= code lf)
    126                  (return :unix))))))))
    127 
    128 
    129 (defvar *known-line-termination-formats* '(:unix :macos :inferred))
     47    (when (zerop n)
     48      (setq n (or (fd-stream-advance (file-ioblock-stream file-ioblock)
     49                                     file-ioblock
     50                                     t)
     51                  0)))
     52    (multiple-value-bind (nchars last)
     53        (funcall (character-encoding-length-of-vector-encoding-function encoding)
     54                 buffer
     55                 0
     56                 n)
     57      (declare (fixnum nchars last))
     58      (let* ((string (make-string nchars)))
     59        (declare (dynamic-extent string))
     60        (decode-character-encoded-vector encoding buffer 0 last string)
     61        (let* ((line-termination
     62                (do* ((i 0 (1+ i))
     63                      (last-was-cr nil))
     64                     ((= nchars) (if last-was-cr :cr))
     65                  (declare (fixnum i))
     66                  (let* ((char (schar string i)))
     67                    (if last-was-cr
     68                      (if (eq char #\Linefeed)
     69                        (return :crlf)
     70                        (return :cr))
     71                      (case char
     72                        (#\Newline (return nil))
     73                        (#\Line_Separator (return :unicode))
     74                        (#\Return (setq last-was-cr t))))))))
     75        (when line-termination
     76          (install-ioblock-input-line-termination file-ioblock line-termination)
     77          (when (file-ioblock-outbuf file-ioblock)
     78            (install-ioblock-output-line-termination file-ioblock line-termination))))))))
     79
     80
    13081
    13182(defvar *default-external-format* :unix)
     
    13990  "The value of this variable is used when :EXTERNAL-FORMAT is
    14091unspecified or specified as :DEFAULT. It can meaningfully be given any
    141 of the values :UNIX, :MACOS, :MSDOS or :INFERRED, each of which is
     92of the values :UNIX, :MACOS, :MSDOS, :UNICODE or :INFERRED, each of which is
    14293interpreted as described in the documentation.
    14394
     
    179130        ((lookup-character-encoding external-format)
    180131         (normalize-external-format domain `(:character-encoding ,external-format)))
    181         ((member external-format *known-line-termination-formats*)
     132        ((assq external-format *canonical-line-termination-conventions*)
    182133         (normalize-external-format domain `(:line-termination ,external-format)))
    183134        (t
     
    188139
    189140
    190 (defun file-stream-force-output (stream ioblock count finish-p)
    191   (let* ((filter (file-ioblock-output-filter ioblock)))
    192     (when filter
    193       (let* ((buffer (io-buffer-buffer (file-ioblock-outbuf ioblock))))
    194         (funcall filter buffer count)))
    195     (fd-stream-force-output stream ioblock count finish-p)))
     141
    196142
    197143;;; Establish a new position for the specified file-stream.
     
    255201          (progn
    256202            (when (file-ioblock-dirty file-ioblock)
    257               (file-stream-force-output (file-ioblock-stream file-ioblock)
    258                                         file-ioblock
    259                                         (io-buffer-count outbuf)
    260                                         nil)
     203              (fd-stream-force-output (file-ioblock-stream file-ioblock)
     204                                      file-ioblock
     205                                      (io-buffer-count outbuf)
     206                                      nil)
    261207              ;; May have just extended the file; may need to update
    262208              ;; fileeof.
     
    309255           (when (file-ioblock-dirty file-ioblock)
    310256             (file-ioblock-seek file-ioblock octet-base)
    311              (file-stream-force-output (file-ioblock-stream file-ioblock)
    312                                        file-ioblock
    313                                        (io-buffer-count outbuf)
    314                                        nil))
     257             (fd-stream-force-output (file-ioblock-stream file-ioblock)
     258                                     file-ioblock
     259                                     (io-buffer-count outbuf)
     260                                     nil))
    315261           (file-ioblock-seek-and-reset file-ioblock
    316262                                        (ioblock-elements-to-octets
     
    539485      nil)))
    540486
    541 ;;; Fill the input buffer, possibly doing newline translation.
    542 (defun file-stream-advance (stream file-ioblock read-p)
    543   (let* ((n (fd-stream-advance stream file-ioblock read-p))
    544          (filter (file-ioblock-input-filter file-ioblock)))
    545       (when (and filter n (> n 0))
    546         (let* ((buf (file-ioblock-inbuf file-ioblock))
    547                (vector (io-buffer-buffer buf)))
    548           (funcall filter vector n)))
    549       n))
     487
    550488 
    551489;;; If we've been reading, the file position where we're going
     
    560498      (break "Expected newpos to be ~d, fd is at ~d" newpos curpos))
    561499    (setf (file-ioblock-octet-pos file-ioblock) newpos)
    562     (file-stream-advance stream file-ioblock read-p)))
     500    (fd-stream-advance stream file-ioblock read-p)))
    563501
    564502;;; If the buffer's dirty, we have to back up and rewrite it before
     
    571509    (when (ioblock-dirty file-ioblock)
    572510      (file-ioblock-seek file-ioblock curpos)
    573       (file-stream-force-output stream file-ioblock count nil))
     511      (fd-stream-force-output stream file-ioblock count nil))
    574512    (unless (eql newpos (file-octet-filepos file-ioblock))
    575513      (break "Expected newpos to be ~d, fd is at ~d"
    576514             newpos (file-octet-filepos file-ioblock)))
    577515    (setf (file-ioblock-octet-pos file-ioblock) newpos)
    578     (file-stream-advance stream file-ioblock read-p)))
     516    (fd-stream-advance stream file-ioblock read-p)))
    579517
    580518                   
     
    585523      (break "Expected newpos to be ~d, fd is at ~d"
    586524             curpos (file-octet-filepos file-ioblock)))
    587     (let* ((n (file-stream-force-output stream file-ioblock count finish-p)))
     525    (let* ((n (fd-stream-force-output stream file-ioblock count finish-p)))
    588526      (incf (file-ioblock-octet-pos file-ioblock) (or n 0))
    589527      n)))
     
    877815                  (setf (file-ioblock-fileeof ioblock)
    878816                        (ioblock-octets-to-elements ioblock (fd-size fd)))
    879                   (install-line-termination-filters ioblock line-termination in-p out-p)
     817                  (when (and in-p (eq line-termination :inferred))
     818                    (infer-line-termination ioblock))
    880819                  (cond ((eq if-exists :append)
    881820                         (file-position fstream :end))
Note: See TracChangeset for help on using the changeset viewer.