Changeset 5278
- Timestamp:
- Sep 27, 2006, 7:33:07 PM (18 years ago)
- File:
-
- 1 edited
-
trunk/ccl/level-1/l1-unicode.lisp (modified) (11 diffs)
Legend:
- Unmodified
- Added
- Removed
-
trunk/ccl/level-1/l1-unicode.lisp
r5262 r5278 89 89 ;; that implements this encoding with swapped byte order. 90 90 (use-byte-order-mark nil) 91 (documentation nil) 91 92 ) 92 93 … … 106 107 ;;; function will have "name" <name> (this is often helpful when debugging.) 107 108 108 (defmacro define-character-encoding (name &rest args &key &allow-other-keys)109 (defmacro define-character-encoding (name doc &rest args &key &allow-other-keys) 109 110 (setq name (intern (string name) "KEYWORD")) 110 111 `(progn 111 112 (setf (get-character-encoding ,name) 112 (make-character-encoding :name ,name ,@args))))113 (make-character-encoding :name ,name :documentation ',doc ,@args)))) 113 114 114 115 (defun encoding-name (encoding) … … 119 120 120 121 (define-character-encoding :iso-8859-1 122 "An 8-bit, fixed-width character encoding in which all character 123 codes map to their Unicode equivalents. Intended to support most 124 characters used in most Western European languages." 121 125 :stream-encode-function 122 126 (nfunction … … 287 291 288 292 (define-character-encoding :iso-8859-2 293 "An 8-bit, fixed-width character encoding in which codes #x00-#x9f 294 map to their Unicode equivalents and other codes map to other Unicode 295 character values. Intended to provide most characters found in most 296 languages used in Central/Eastern Europe." 289 297 :stream-encode-function 290 298 (nfunction … … 483 491 484 492 (define-character-encoding :iso-8859-3 493 "An 8-bit, fixed-width character encoding in which codes #x00-#x9f 494 map to their Unicode equivalents and other codes map to other Unicode 495 character values. Intended to provide most characters found in most 496 languages used in Southern Europe." 485 497 :stream-encode-function 486 498 (nfunction … … 693 705 694 706 (define-character-encoding :iso-8859-4 707 "An 8-bit, fixed-width character encoding in which codes #x00-#x9f 708 map to their Unicode equivalents and other codes map to other Unicode 709 character values. Intended to provide most characters found in most 710 languages used in Northern Europe." 695 711 :stream-encode-function 696 712 (nfunction … … 827 843 ;;; would certainly be simpler) if it didn't. 828 844 (define-character-encoding :utf-8 845 "An 8-bit, variable-length character encoding in which characters 846 with CHAR-CODEs in the range #x00-#x7f can be encoded in a single 847 octet; characters with larger code values can be encoded in 2 to 4 848 bytes." 829 849 :max-units-per-char 4 830 850 :stream-encode-function … … 1250 1270 1251 1271 ;;; utf-16, native byte order. 1252 (define-character-encoding 1253 #+big-endian-target :utf-16be #-big-endian-target :utf-16le 1272 (define-character-encoding #+big-endian-target :utf-16be #-big-endian-target :utf-16le 1273 #+big-endian-target 1274 "A 16-bit, variable-length encoding in which characters with 1275 CHAR-CODEs less than #x10000 can be encoded in a single 16-bit 1276 big-endian word and characters with larger codes can be encoded in a 1277 pair of 16-bit big-endian words. The endianness of the encoded data 1278 is implicit in the encoding; byte-order-mark characters are not 1279 interpreted on input or prepended to output." 1280 #+little-endian-target 1281 "A 16-bit, variable-length encoding in which characters with 1282 CHAR-CODEs less than #x10000 can be encoded in a single 16-bit 1283 little-endian word and characters with larger codes can be encoded in 1284 a pair of 16-bit little-endian words. The endianness of the encoded 1285 data is implicit in the encoding; byte-order-mark characters are not 1286 interpreted on input or prepended to output." 1254 1287 :max-units-per-char 2 1255 1288 :code-unit-size 16 … … 1407 1440 1408 1441 ;;; utf-16, reversed byte order 1409 (define-character-encoding 1410 #+big-endian-target :utf-16le #-big-endian-target :utf-16be 1442 (define-character-encoding #+big-endian-target :utf-16le #-big-endian-target :utf-16be 1443 #+little-endian-target 1444 "A 16-bit, variable-length encoding in which characters with 1445 CHAR-CODEs less than #x10000 can be encoded in a single 16-bit 1446 big-endian word and characters with larger codes can be encoded in a 1447 pair of 16-bit big-endian words. The endianness of the encoded data 1448 is implicit in the encoding; byte-order-mark characters are not 1449 interpreted on input or prepended to output." 1450 #+big-endian-target 1451 "A 16-bit, variable-length encoding in which characters with 1452 CHAR-CODEs less than #x10000 can be encoded in a single 16-bit 1453 little-endian word and characters with larger codes can be encoded in 1454 a pair of 16-bit little-endian words. The endianness of the encoded 1455 data is implicit in the encoding; byte-order-mark characters are not 1456 interpreted on input or prepended to output." 1411 1457 :max-units-per-char 2 1412 1458 :code-unit-size 16 … … 1576 1622 ;;; there is no BOM. 1577 1623 1578 (define-character-encoding 1579 :utf-16 1624 (define-character-encoding :utf-16 1625 "A 16-bit, variable-length encoding in which characters with 1626 CHAR-CODEs less than #x10000 can be encoded in a single 16-bit 1627 word and characters with larger codes can be encoded in a 1628 pair of 16-bit words. The endianness of the encoded data is 1629 indicated by the endianness of a byte-order-mark character (#\u+feff) 1630 prepended to the data; in the absence of such a character on input, 1631 the data is assumed to be in big-endian order." 1580 1632 :max-units-per-char 2 1581 1633 :code-unit-size 16 … … 1787 1839 #+little-endian-target :utf-16be 1788 1840 ) 1841 1842 1843 1844 (defun describe-character-encodings () 1845 (let* ((encodings nil)) 1846 (maphash #'(lambda (name enc) 1847 (when name 1848 (push (cons name (character-encoding-documentation enc)) 1849 encodings))) 1850 *character-encodings*) 1851 (dolist (pair (sort encodings #'string< :key #'car)) 1852 (format t "~&~s~&~a~%~%" (car pair) (cdr pair)))))
Note:
See TracChangeset
for help on using the changeset viewer.
