Changeset 10016


Ignore:
Timestamp:
Jul 13, 2008, 12:55:18 AM (11 years ago)
Author:
gb
Message:

%FAST-COMPACT, %FAST-UNCOMPACT: try to encode to/decode from utf-8.
Vector should be the same size if string is all ASCII, a little
bigger if it occasionally contains 8-bit non-ASCII, and a lot
smaller if it contains a few non-8-bit characters. New code
seems faster.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • branches/working-0711/ccl/compiler/nx0.lisp

    r9937 r10016  
    16741674         string)))))
    16751675
     1676(defun %fast-compact-simple-string (string start end)
     1677  (declare (simple-string string)
     1678           (fixnum start end))
     1679  (let* ((len (- end start))
     1680         (noctets (utf-8-octets-in-string string start end))
     1681         (vec (make-array noctets :element-type '(unsigned-byte 8))))
     1682    (declare (fixnum len noctets)
     1683             (type (simple-array (unsigned-byte 8) (*)) vec))
     1684    (if (= len noctets)                 ;ASCII.
     1685      (do* ((in start (1+ in))
     1686            (out 0 (1+ out)))
     1687           ((= in end))
     1688        (declare (fixnum in out))
     1689        (setf (aref vec out) (%scharcode string in)))
     1690      (funcall (character-encoding-vector-encode-function
     1691                (get-character-encoding :utf-8))
     1692               string
     1693               vec
     1694               0
     1695               start
     1696               end))
     1697    vec))
     1698
    16761699(defun %fast-compact (string)
    16771700  ;; mb: bootstrap
    1678   (when (typep string '(array (unsigned-byte 8)))
     1701  (when (typep string '(array (unsigned-byte 8) (*)))
    16791702    (return-from %fast-compact string))
    16801703  (when (null string)
    16811704    (return-from %fast-compact nil))
    1682   (let ((vec (make-array (length string) :element-type '(unsigned-byte 8))))
    1683     (loop
    1684       for char across string
    1685       for index upfrom 0
    1686       if (<= 0 (char-code char) 255)
    1687          do (setf (aref vec index) (char-code char))
    1688       else
    1689         do (warn "Can't %fast-compact ~C in ~S." char string)
    1690         and do (setf (aref vec index) (char-code #\?)))
    1691     vec))
     1705  (etypecase string
     1706    (simple-string
     1707     (%fast-compact-simple-string string 0 (uvsize string)))
     1708    (string
     1709     (multiple-value-bind (data offset)
     1710         (array-data-and-offset string)
     1711       (declare (fixnum offset))
     1712       (%fast-compact-simple-string data offset (the fixnum (+ offset (length string))))))))
    16921713
    16931714(defun %fast-uncompact (data)
    1694   (if (or (stringp data) (null data))
    1695     data
    1696     (let ((string (make-array (length data) :element-type 'character)))
    1697       (map-into string #'code-char data)
    1698       string)))
     1715  (etypecase data
     1716    ((simple-array (unsigned-byte 8) (*))
     1717     (let* ((encoding (get-character-encoding :utf-8))
     1718            (noctets (length data))
     1719            (nchars (funcall (character-encoding-length-of-vector-encoding-function encoding)
     1720                             data
     1721                             0
     1722                             noctets))
     1723            (string (make-string nchars)))
     1724       (declare (fixnum noctets nchars)
     1725                (simple-string string))
     1726       (if (= noctets nchars)           ;ASCII
     1727         (dotimes (i nchars)
     1728           (setf (%scharcode string i) (aref data i)))
     1729         (funcall (character-encoding-vector-decode-function encoding)
     1730                  data
     1731                  0
     1732                  noctets
     1733                  string))
     1734       string))
     1735    (null data)
     1736    (string data)))
     1737
    16991738
    17001739(defun record-source-location-on-stream-p (stream)
Note: See TracChangeset for help on using the changeset viewer.