Changeset 9576


Ignore:
Timestamp:
May 22, 2008, 8:23:56 AM (11 years ago)
Author:
gb
Message:

Fix several bugs (more likely remain; some were fixed in trunk, some
haven't been.)

Define GET-ENCODED-STRING, which is one of many approaches.

Export GET-ENCODED-STRING.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • branches/working-0711-perf/ccl/level-1/l1-unicode.lisp

    r7794 r9576  
    178178  (declare (ignore vector))
    179179  (if (>= end start)
    180     (values (- end start) (- end start))
     180    (values (- end start) end)
    181181    (values 0 0)))
    182182
     
    268268
    269269(define-character-encoding :us-ascii
    270   "An 7-bit, fixed-width character encoding in which all character
    271 codes map to their Unicode equivalents. "
     270  "A 7-bit, fixed-width character encoding in which all character
     271codes map to their Unicode equivalents."
    272272
    273273  :aliases '(:csASCII :cp637 :IBM637 :us :ISO646-US :ascii :ISO-ir-6)
     
    37053705                  (fixnum p highbits))
    37063706         (cond ((< highbits 0)
    3707                 (setf (%get-unsigned-word pointer idx) code)
     3707                (setf (%get-unsigned-word pointer idx) #+big-endian-target code #+little-endian-target (%swap-u16 code))
    37083708                (incf idx 2))
    37093709               (t
    3710                 (setf (%get-unsigned-word pointer idx) (logior #xd800 (the fixnum (ash highbits -10))))
    3711 
    3712                 (setf (%get-unsigned-word pointer (the fixnum (+ idx 2))) (logior #xdc00 (the fixnum (logand highbits #x3ff))))
    3713                 (incf idx 4)))))))
     3710                (let* ((w1 (logior #xd800 (the fixnum (ash highbits -10))))
     3711                       (w2 (logior #xdc00 (the fixnum (logand highbits #x3ff)))))
     3712                  (declare (type (unsigned-byte 16) w1 w2))
     3713                (setf (%get-unsigned-word pointer idx)
     3714                      #+big-endian-target w1 #+little-endian-target (%swap-u16 w1))
     3715                (setf (%get-unsigned-word pointer (the fixnum (+ idx 2)))
     3716                      #+big-endian-target w2
     3717                      #+little-endian-target (%swap-u16 w2))
     3718                (incf idx 4))))))))
    37143719  :memory-decode-function
    37153720  (nfunction
     
    37673772                      (t #+little-endian-target t)))))
    37683773       (do* ((i start)
    3769              (j (+ 2 i) (+ 2 i))
     3774             (j (+ 2 i) (+ 2 j))
    37703775             (nchars 0))
    37713776            ((> j end)
    3772              (if (= i end) (values nchars i))
    3773              (let* ((code (if swap
    3774                             (%reversed-u8-ref-u16 vector i)
    3775                             (%native-u8-ref-u16 vector i)))
    3776                     (nexti (+ i (if (or (< code #xd800)
    3777                                         (>= code #xdc00))
    3778                                   2
    3779                                   4))))
    3780                (declare (type (unsigned-byte 16) code)
    3781                         (fixnum nexti))
    3782                (if (> nexti end)
    3783                  (return (values nchars i))
    3784                  (setq i nexti nchars (1+ nchars)))))))))
     3777             (if (= i end) (values nchars i)))
     3778         (let* ((code (if swap
     3779                        (%reversed-u8-ref-u16 vector i)
     3780                        (%native-u8-ref-u16 vector i)))
     3781                (nexti (+ i (if (or (< code #xd800)
     3782                                    (>= code #xdc00))
     3783                              2
     3784                              4))))
     3785           (declare (type (unsigned-byte 16) code)
     3786                    (fixnum nexti))
     3787           (if (> nexti end)
     3788             (return (values nchars i))
     3789             (setq i nexti nchars (1+ nchars))))))))
    37853790  :length-of-memory-encoding-function
    37863791  (nfunction
     
    37993804                      (t #+little-endian-target t)))))
    38003805       (do* ((i start)
     3806             (j (+ i 2) (+ i 2))
     3807             (end (+ start noctets))
    38013808             (nchars 0 (1+ nchars)))
    3802             ((>= i noctets)
    3803              (if (= i noctets) nchars))
     3809            ((> j end) (values nchars i))
    38043810         (let* ((code (%get-unsigned-word pointer i)))
    38053811           (declare (type (unsigned-byte 16) code))
     
    42404246
    42414247(defun (setf %little-endian-u8-ref-u32) (val u8-vector idx)
    4242   (declare (type (unsigned-byte 16) val)
     4248  (declare (type (unsigned-byte 32) val)
    42434249           (type (simple-array (unsigned-byte 8) (*)) u8-vector)
    42444250           (fixnum idx))
     
    42514257
    42524258;;; UTF-32/UCS-4, native byte order
    4253 (define-character-encoding #+big-endian-target :utf-32be #-big-endian-target :utf32-le
     4259(define-character-encoding #+big-endian-target :utf-32be #-big-endian-target :utf32le
    42544260  #+big-endian-target
    42554261  "A 32-bit, fixed-length encoding in which all Unicode characters
     
    47184724(defun %count-characters-in-octet-vector (vector start end encoding)
    47194725  (unless (= (typecode vector) target::subtag-u8-vector)
    4720     (report-bad-arg vector '(simple-array (unsgigned-byte 8) (*))))
     4726    (report-bad-arg vector '(simple-array (unsigned-byte 8) (*))))
    47214727  (funcall (character-encoding-length-of-vector-encoding-function encoding)
    47224728           vector
    47234729           start
    4724            (- end start)))
     4730           end))
    47254731                                         
    47264732
     
    47894795        (funcall (character-encoding-memory-encode-function encoding)
    47904796                 data pointer offset (+ data-offset (or start 0)) (+ data-offset (or end (length s))))))))
     4797
     4798(defun get-encoded-string (encoding-name pointer noctets)
     4799  (let* ((encoding (ensure-character-encoding encoding-name)))
     4800    (multiple-value-bind (nchars nused)
     4801        (funcall (character-encoding-length-of-memory-encoding-function encoding)
     4802                 pointer
     4803                 noctets
     4804                 0)
     4805      (let* ((string (make-string nchars)))
     4806        (funcall (character-encoding-memory-decode-function encoding)
     4807                 pointer
     4808                 nused
     4809                 0
     4810                 string)
     4811        string))))
     4812       
     4813
    47914814     
    47924815
Note: See TracChangeset for help on using the changeset viewer.