Changeset 5278


Ignore:
Timestamp:
Sep 27, 2006, 7:33:07 PM (18 years ago)
Author:
Gary Byers
Message:

Character-encodings have doc strings.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/ccl/level-1/l1-unicode.lisp

    r5262 r5278  
    8989  ;; that implements this encoding with swapped byte order.
    9090  (use-byte-order-mark nil)
     91  (documentation nil)
    9192  )
    9293
     
    106107;;; function will have "name" <name> (this is often helpful when debugging.)
    107108
    108 (defmacro define-character-encoding (name &rest args &key &allow-other-keys)
     109(defmacro define-character-encoding (name doc &rest args &key &allow-other-keys)
    109110  (setq name (intern (string name) "KEYWORD"))
    110111  `(progn
    111112    (setf (get-character-encoding ,name)
    112      (make-character-encoding :name ,name  ,@args))))
     113     (make-character-encoding :name ,name  :documentation ',doc ,@args))))
    113114
    114115(defun encoding-name (encoding)
     
    119120
    120121(define-character-encoding :iso-8859-1
     122  "An 8-bit, fixed-width character encoding in which all character
     123codes map to their Unicode equivalents. Intended to support most
     124characters used in most Western European languages."
    121125  :stream-encode-function
    122126  (nfunction
     
    287291
    288292(define-character-encoding :iso-8859-2
     293  "An 8-bit, fixed-width character encoding in which codes #x00-#x9f
     294map to their Unicode equivalents and other codes map to other Unicode
     295character values.  Intended to provide most characters found in most
     296languages used in Central/Eastern Europe."
    289297  :stream-encode-function
    290298  (nfunction
     
    483491   
    484492(define-character-encoding :iso-8859-3
     493  "An 8-bit, fixed-width character encoding in which codes #x00-#x9f
     494map to their Unicode equivalents and other codes map to other Unicode
     495character values.  Intended to provide most characters found in most
     496languages used in Southern Europe."
    485497  :stream-encode-function
    486498  (nfunction
     
    693705
    694706(define-character-encoding :iso-8859-4
     707  "An 8-bit, fixed-width character encoding in which codes #x00-#x9f
     708map to their Unicode equivalents and other codes map to other Unicode
     709character values.  Intended to provide most characters found in most
     710languages used in Northern Europe."
    695711  :stream-encode-function
    696712  (nfunction
     
    827843;;; would certainly be simpler) if it didn't.
    828844(define-character-encoding :utf-8
     845    "An 8-bit, variable-length character encoding in which characters
     846with CHAR-CODEs in the range #x00-#x7f can be encoded in a single
     847octet; characters with larger code values can be encoded in 2 to 4
     848bytes."
    829849    :max-units-per-char 4
    830850    :stream-encode-function
     
    12501270
    12511271;;; utf-16, native byte order.
    1252 (define-character-encoding
    1253     #+big-endian-target :utf-16be #-big-endian-target :utf-16le
     1272(define-character-encoding #+big-endian-target :utf-16be #-big-endian-target :utf-16le
     1273    #+big-endian-target
     1274    "A 16-bit, variable-length encoding in which characters with
     1275CHAR-CODEs less than #x10000 can be encoded in a single 16-bit
     1276big-endian word and characters with larger codes can be encoded in a
     1277pair of 16-bit big-endian words.  The endianness of the encoded data
     1278is implicit in the encoding; byte-order-mark characters are not
     1279interpreted on input or prepended to output."
     1280    #+little-endian-target
     1281    "A 16-bit, variable-length encoding in which characters with
     1282CHAR-CODEs less than #x10000 can be encoded in a single 16-bit
     1283little-endian word and characters with larger codes can be encoded in
     1284a pair of 16-bit little-endian words.  The endianness of the encoded
     1285data is implicit in the encoding; byte-order-mark characters are not
     1286interpreted on input or prepended to output."
    12541287    :max-units-per-char 2
    12551288    :code-unit-size 16
     
    14071440
    14081441;;; utf-16, reversed byte order
    1409 (define-character-encoding
    1410     #+big-endian-target :utf-16le #-big-endian-target :utf-16be
     1442(define-character-encoding #+big-endian-target :utf-16le #-big-endian-target :utf-16be
     1443    #+little-endian-target
     1444    "A 16-bit, variable-length encoding in which characters with
     1445CHAR-CODEs less than #x10000 can be encoded in a single 16-bit
     1446big-endian word and characters with larger codes can be encoded in a
     1447pair of 16-bit big-endian words.  The endianness of the encoded data
     1448is implicit in the encoding; byte-order-mark characters are not
     1449interpreted on input or prepended to output."
     1450    #+big-endian-target
     1451    "A 16-bit, variable-length encoding in which characters with
     1452CHAR-CODEs less than #x10000 can be encoded in a single 16-bit
     1453little-endian word and characters with larger codes can be encoded in
     1454a pair of 16-bit little-endian words.  The endianness of the encoded
     1455data is implicit in the encoding; byte-order-mark characters are not
     1456interpreted on input or prepended to output."
    14111457    :max-units-per-char 2
    14121458    :code-unit-size 16
     
    15761622;;; there is no BOM.
    15771623
    1578 (define-character-encoding
    1579     :utf-16
     1624(define-character-encoding :utf-16
     1625    "A 16-bit, variable-length encoding in which characters with
     1626CHAR-CODEs less than #x10000 can be encoded in a single 16-bit
     1627word and characters with larger codes can be encoded in a
     1628pair of 16-bit words.  The endianness of the encoded data is
     1629indicated by the endianness of a byte-order-mark character (#\u+feff)
     1630prepended to the data; in the absence of such a character on input,
     1631the data is assumed to be in big-endian order."   
    15801632    :max-units-per-char 2
    15811633    :code-unit-size 16
     
    17871839    #+little-endian-target :utf-16be
    17881840    )
     1841
     1842
     1843
     1844(defun describe-character-encodings ()
     1845  (let* ((encodings nil))
     1846    (maphash #'(lambda (name enc)
     1847                 (when name
     1848                   (push (cons name (character-encoding-documentation enc))
     1849                         encodings)))
     1850             *character-encodings*)
     1851    (dolist (pair (sort encodings #'string< :key #'car))
     1852      (format t "~&~s~&~a~%~%" (car pair) (cdr pair)))))
Note: See TracChangeset for help on using the changeset viewer.