Changeset 11625


Ignore:
Timestamp:
Jan 19, 2009, 4:15:45 PM (10 years ago)
Author:
gb
Message:

Add CHARACTER-SIZE-IN-OCTETS-FUNCTION to CHARACTER-ENCODING.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/source/level-1/l1-unicode.lisp

    r11203 r11625  
    4040               thing))))
    4141
     42
     43(defun character-encoded-in-single-octet (c)
     44  (declare (ignore c))
     45  1)
    4246
    4347(defstruct character-encoding
     
    110114  ;; Char-codes less than  this value map to themselves on output.
    111115  (encode-literal-char-code-limit 0)
     116  (character-size-in-octets-function 'character-encoded-in-single-octet)
    112117  )
    113118
     
    29142919  :encode-literal-char-code-limit #x80 
    29152920  )
     2921
    29162922
    29172923;;; UTF-8.  Decoding checks for malformed sequences; it might be faster (and
     
    31603166    :encode-literal-char-code-limit #x80   
    31613167    :bom-encoding #(#xef #xbb #xbf)
     3168    :character-size-in-octets-function  (lambda (c)
     3169                                          (let* ((code (char-code c)))
     3170                                            (declare (type (mod #x110000) code))
     3171                                            (if (< code #x80)
     3172                                              1
     3173                                              (if (< code #x800)
     3174                                                2
     3175                                                (if (< code #x10000)
     3176                                                  3
     3177                                                  4)))))
     3178     
    31623179    )
    31633180
     
    32123229
    32133230
    3214 (defun utf-16-octets-in-string (string start end)
    3215   (if (>= end start)
    3216     (do* ((noctets 0)
    3217           (i start (1+ i)))
    3218          ((= i end) noctets)
    3219       (declare (fixnum noctets))
    3220       (let* ((code (char-code (schar string i))))
    3221         (declare (type (mod #x110000) code))
    3222         (incf noctets
    3223               (if (< code #x10000)
    3224                 2
    3225                 4))))
    3226     0))
    3227 
    32283231
    32293232(declaim (inline %big-endian-u8-ref-u16 %little-endian-u8-ref-u16))
     
    32773280  val)
    32783281
     3282(defun utf-16-character-size-in-octets (c)
     3283  (let* ((code (char-code c)))
     3284    (declare (type (mod #x110000) code))
     3285    (if (< code #x10000)
     3286      2
     3287      4)))
    32793288
    32803289;;; utf-16, native byte order.
     
    34403449    :encode-literal-char-code-limit #x10000
    34413450    :nul-encoding #(0 0)
     3451    :character-size-in-octets-function 'utf-16-character-size-in-octets
    34423452    )
    34433453
     
    36043614  :encode-literal-char-code-limit #x10000
    36053615  :nul-encoding #(0 0)
     3616  :character-size-in-octets-function 'utf-16-character-size-in-octets
    36063617  )
    36073618
     
    38373848  :bom-encoding #+big-endian-target #(#xfe #xff) #+little-endian-target #(#xff #xfe)
    38383849  :nul-encoding #(0 0)
     3850  :character-size-in-octets-function 'utf-16-character-size-in-octets 
    38393851  )
    38403852
     3853
     3854(defun two-octets-per-character (c)
     3855  (declare (ignore c))
     3856  2)
    38413857
    38423858(defun ucs-2-stream-encode (char write-function stream)
     
    39583974  :encode-literal-char-code-limit #x10000 
    39593975  :nul-encoding #(0 0)
     3976  :character-size-in-octets-function 'two-octets-per-character
    39603977  )
    39613978
     
    40464063  :encode-literal-char-code-limit #x10000
    40474064  :nul-encoding #(0 0)
     4065  :character-size-in-octets-function 'two-octets-per-character
    40484066  )
    40494067
     
    41824200  #+little-endian-target :ucs-2be
    41834201  :nul-encoding #(0 0)
     4202  :character-size-in-octets-function 'two-octets-per-character
    41844203  )
    41854204
     4205
     4206(defun four-octets-per-character (c)
     4207  (declare (ignore c))
     4208  4)
    41864209
    41874210(defun ucs-4-stream-encode (char write-function stream)
     
    43654388  :encode-literal-char-code-limit #x110000
    43664389  :nul-encoding #(0 0 0 0)
     4390  :character-size-in-octets-function 'four-octets-per-character
    43674391  )
    43684392
     
    44634487  :encode-literal-char-code-limit #x110000
    44644488  :nul-encoding #(0 0 0 0) 
     4489  :character-size-in-octets-function 'four-octets-per-character
    44654490  )
    44664491
     
    45964621  :bom-encoding #+big-endian-target #(#x00 #x00 #xfe #xff) #+little-endian-target #(#xff #xfe #x00 #x00)
    45974622  :nul-encoding #(0 0 0 0) 
     4623  :character-size-in-octets-function 'four-octets-per-character
    45984624  )
    45994625
Note: See TracChangeset for help on using the changeset viewer.