Changeset 5387
- Timestamp:
- Oct 21, 2006, 3:45:42 PM (18 years ago)
- File:
-
- 1 edited
-
trunk/ccl/level-1/l1-unicode.lisp (modified) (17 diffs)
Legend:
- Unmodified
- Added
- Removed
-
trunk/ccl/level-1/l1-unicode.lisp
r5362 r5387 375 375 )) 376 376 377 (defparameter *unicode-00a0-0180-to-iso 8859-2*377 (defparameter *unicode-00a0-0180-to-iso-8859-2* 378 378 #( 379 379 #xa0 nil nil nil #xa4 nil nil #xa7 ; #xa0-#xa7 … … 408 408 )) 409 409 410 (defparameter *unicode-00c0-00e0-to-iso 8859-2*410 (defparameter *unicode-00c0-00e0-to-iso-8859-2* 411 411 #( 412 412 nil nil nil nil nil nil nil #xb7 ; #xc0-#xc7 … … 429 429 (c2 (cond ((< code #xa0) code) 430 430 ((< code #x180) 431 (svref *unicode-00a0-0180-to-iso 8859-2*431 (svref *unicode-00a0-0180-to-iso-8859-2* 432 432 (the fixnum (- code #xa0)))) 433 433 ((and (>= code #x2c0) (< code #x2e0)) 434 (svref *unicode-00c0-00e0-to-iso 8859-2*434 (svref *unicode-00c0-00e0-to-iso-8859-2* 435 435 (the fixnum (- code #x2c0))))))) 436 436 … … 458 458 (c2 (cond ((< code #xa0) code) 459 459 ((< code #x180) 460 (svref *unicode-00a0-0180-to-iso 8859-2*460 (svref *unicode-00a0-0180-to-iso-8859-2* 461 461 (the fixnum (- code #xa0)))) 462 462 ((and (>= code #x2c0) (< code #x2e0)) 463 (svref *unicode-00c0-00e0-to-iso 8859-2*463 (svref *unicode-00c0-00e0-to-iso-8859-2* 464 464 (the fixnum (- code #x2c0))))))) 465 465 (declare (type (mod #x110000) code)) … … 489 489 (c2 (cond ((< code #xa0) code) 490 490 ((< code #x180) 491 (svref *unicode-00a0-0180-to-iso 8859-2*491 (svref *unicode-00a0-0180-to-iso-8859-2* 492 492 (the fixnum (- code #xa0)))) 493 493 ((and (>= code #x2c0) (< code #x2e0)) 494 (svref *unicode-00c0-00e0-to-iso 8859-2*494 (svref *unicode-00c0-00e0-to-iso-8859-2* 495 495 (the fixnum (- code #x2c0))))))) 496 496 (declare (type (mod #x110000) code)) … … 541 541 )) 542 542 543 (defparameter *unicode-a0-100-to-iso 8859-3*543 (defparameter *unicode-a0-100-to-iso-8859-3* 544 544 #( 545 545 #xa0 nil nil #xa3 #xa4 nil nil #xa7 ; #xa0-#xa7 … … 557 557 )) 558 558 559 (defparameter *unicode-108-180-to-iso 8859-3*559 (defparameter *unicode-108-180-to-iso-8859-3* 560 560 #( 561 561 #xc6 #xe6 #xc5 #xe5 #x00 #x00 #x00 #x00 ; #x108-#x10f … … 576 576 )) 577 577 578 (defparameter *unicode-2d8-2e0-to-iso 8859-3*578 (defparameter *unicode-2d8-2e0-to-iso-8859-3* 579 579 #( 580 580 #xa2 #xff nil nil nil nil nil nil ; #x2d8-#x2df … … 597 597 (c2 (cond ((< code #xa0) code) 598 598 ((< code #x100) 599 (svref *unicode-a0-100-to-iso 8859-3*599 (svref *unicode-a0-100-to-iso-8859-3* 600 600 (the fixnum (- code #xa0)))) 601 601 ((and (>= code #x108) (< code #x180)) 602 (svref *unicode-108-180-to-iso 8859-3*602 (svref *unicode-108-180-to-iso-8859-3* 603 603 (the fixnum (- code #x108)))) 604 604 ((and (>= code #x2d8) (< code #x2e0)) 605 (svref *unicode-2d8-2e0-to-iso 8859-3*605 (svref *unicode-2d8-2e0-to-iso-8859-3* 606 606 (the fixnum (- code #x2d8))))))) 607 607 (declare (type (mod #x110000) code)) … … 629 629 (c2 (cond ((< code #xa0) code) 630 630 ((< code #x100) 631 (svref *unicode-a0-100-to-iso 8859-3*631 (svref *unicode-a0-100-to-iso-8859-3* 632 632 (the fixnum (- code #xa0)))) 633 633 ((and (>= code #x108) (< code #x180)) 634 (svref *unicode-108-180-to-iso 8859-3*634 (svref *unicode-108-180-to-iso-8859-3* 635 635 (the fixnum (- code #x108)))) 636 636 ((and (>= code #x2d8) (< code #x2e0)) 637 (svref *unicode-2d8-2e0-to-iso 8859-3*637 (svref *unicode-2d8-2e0-to-iso-8859-3* 638 638 639 639 (the fixnum (- code #x2d8))))))) … … 664 664 (c2 (cond ((< code #xa0) code) 665 665 ((< code #x100) 666 (svref *unicode-a0-100-to-iso 8859-3*666 (svref *unicode-a0-100-to-iso-8859-3* 667 667 (the fixnum (- code #xa0)))) 668 668 ((and (>= code #x108) (< code #x180)) 669 (svref *unicode-108-180-to-iso 8859-3*669 (svref *unicode-108-180-to-iso-8859-3* 670 670 (the fixnum (- code #x108)))) 671 671 ((and (>= code #x2d8) (< code #x2e0)) 672 (svref *unicode-2d8-2e0-to-iso 8859-3*672 (svref *unicode-2d8-2e0-to-iso-8859-3* 673 673 (the fixnum (- code #x2d8))))))) 674 674 (declare (type (mod #x110000) code)) … … 721 721 722 722 723 (defparameter *unicode-a0-180-to-iso 8859-4*723 (defparameter *unicode-a0-180-to-iso-8859-4* 724 724 #( 725 725 #xa0 nil nil nil #xa4 nil nil #xa7 ; #xa0-#xa7 … … 753 753 )) 754 754 755 (defparameter *unicode-2c0-2e0-to-iso 8859-4*755 (defparameter *unicode-2c0-2e0-to-iso-8859-4* 756 756 #( 757 757 nil nil nil nil nil nil nil #xb7 ; #x2c0-#x2c7 … … 777 777 (c2 (cond ((< code #xa0) code) 778 778 ((< code #x180) 779 (svref *unicode-a0-180-to-iso 8859-4*779 (svref *unicode-a0-180-to-iso-8859-4* 780 780 (the fixnum (- code #xa0)))) 781 781 ((and (>= code #x2d8) (< code #x2e0)) 782 (svref *unicode-2c0-2e0-to-iso 8859-4*782 (svref *unicode-2c0-2e0-to-iso-8859-4* 783 783 (the fixnum (- code #x2c0))))))) 784 784 … … 807 807 (c2 (cond ((< code #xa0) code) 808 808 ((< code #x180) 809 (svref *unicode-a0-180-to-iso 8859-4*809 (svref *unicode-a0-180-to-iso-8859-4* 810 810 (the fixnum (- code #xa0)))) 811 811 ((and (>= code #x2d8) (< code #x2e0)) 812 (svref *unicode-2c0-2e0-to-iso 8859-4*812 (svref *unicode-2c0-2e0-to-iso-8859-4* 813 813 (the fixnum (- code #x2c0))))))) 814 814 (declare (type (mod #x110000) code)) … … 838 838 (c2 (cond ((< code #xa0) code) 839 839 ((< code #x180) 840 (svref *unicode-a0-180-to-iso 8859-4*840 (svref *unicode-a0-180-to-iso-8859-4* 841 841 (the fixnum (- code #xa0)))) 842 842 ((and (>= code #x2d8) (< code #x2e0)) 843 (svref *unicode-2c0-2e0-to-iso 8859-4*843 (svref *unicode-2c0-2e0-to-iso-8859-4* 844 844 (the fixnum (- code #x2c0))))))) 845 845 (declare (type (mod #x110000) code)) … … 867 867 :literal-char-code-limit #xa0 868 868 ) 869 870 (defparameter *iso-8859-5-to-unicode* 871 #( 872 ;; #xa0 873 #\u+00a0 #\u+0401 #\u+0402 #\u+0403 #\u+0404 #\u+0405 #\u+0406 #\u+0407 874 #\u+0408 #\u+0409 #\u+040a #\u+040b #\u+040c #\u+00ad #\u+040e #\u+040f 875 ;; #xb0 876 #\u+0410 #\u+0411 #\u+0412 #\u+0413 #\u+0414 #\u+0415 #\u+0416 #\u+0417 877 #\u+0418 #\u+0419 #\u+041a #\u+041b #\u+041c #\u+041d #\u+041e #\u+041f 878 ;; #xc0 879 #\u+0420 #\u+0421 #\u+0422 #\u+0423 #\u+0424 #\u+0425 #\u+0426 #\u+0427 880 #\u+0428 #\u+0429 #\u+042a #\u+042b #\u+042c #\u+042d #\u+042e #\u+042f 881 ;; #xd0 882 #\u+0430 #\u+0431 #\u+0432 #\u+0433 #\u+0434 #\u+0435 #\u+0436 #\u+0437 883 #\u+0438 #\u+0439 #\u+043a #\u+043b #\u+043c #\u+043d #\u+043e #\u+043f 884 ;; #xe0 885 #\u+0440 #\u+0441 #\u+0442 #\u+0443 #\u+0444 #\u+0445 #\u+0446 #\u+0447 886 #\u+0448 #\u+0449 #\u+044a #\u+044b #\u+044c #\u+044d #\u+044e #\u+044f 887 ;; #xf0 888 #\u+2116 #\u+0451 #\u+0452 #\u+0453 #\u+0454 #\u+0455 #\u+0456 #\u+0457 889 #\u+0458 #\u+0459 #\u+045a #\u+045b #\u+045c #\u+00a7 #\u+045e #\u+045f 890 )) 891 892 893 (defparameter *unicode-a0-b0-to-iso-8859-5* 894 #( 895 #xa0 nil nil nil nil nil nil #xfd ; #xa0-#xa7 896 nil nil nil nil nil #xad nil nil ; #xa8-#xaf 897 )) 898 899 (defparameter *unicode-400-460-to-iso-8859-5* 900 #( 901 nil #xa1 #xa2 #xa3 #xa4 #xa5 #xa6 #xa7 ; #x400-#x407 902 #xa8 #xa9 #xaa #xab #xac nil #xae #xaf ; #x408-#x40f 903 #xb0 #xb1 #xb2 #xb3 #xb4 #xb5 #xb6 #xb7 ; #x410-#x417 904 #xb8 #xb9 #xba #xbb #xbc #xbd #xbe #xbf ; #x418-#x41f 905 #xc0 #xc1 #xc2 #xc3 #xc4 #xc5 #xc6 #xc7 ; #x420-#x427 906 #xc8 #xc9 #xca #xcb #xcc #xcd #xce #xcf ; #x428-#x42f 907 #xd0 #xd1 #xd2 #xd3 #xd4 #xd5 #xd6 #xd7 ; #x430-#x437 908 #xd8 #xd9 #xda #xdb #xdc #xdd #xde #xdf ; #x438-#x43f 909 #xe0 #xe1 #xe2 #xe3 #xe4 #xe5 #xe6 #xe7 ; #x440-#x447 910 #xe8 #xe9 #xea #xeb #xec #xed #xee #xef ; #x448-#x44f 911 nil #xf1 #xf2 #xf3 #xf4 #xf5 #xf6 #xf7 ; #x450-#x457 912 #xf8 #xf9 #xfa #xfb #xfc nil #xfe #xff ; #x458-#x45f 913 )) 914 915 916 (define-character-encoding :iso-8859-5 917 "An 8-bit, fixed-width character encoding in which codes #x00-#x9f 918 map to their Unicode equivalents and other codes map to other Unicode 919 character values. Intended to provide most characters found in the 920 Cyrillic alphabet." 921 922 :aliases '(:iso_8859-5 :cyrillic :csISOLatinCyrillic :iso-ir-144) 923 :stream-encode-function 924 (nfunction 925 iso-8859-5-stream-encode 926 (lambda (char write-function stream) 927 (let* ((code (char-code char)) 928 (c2 (cond ((< code #xa0) code) 929 ((< code #xb0) 930 (svref *unicode-a0-b0-to-iso-8859-5* 931 (the fixnum (- code #xa0)))) 932 ((and (>= code #x400) (< code #x460)) 933 (svref *unicode-400-460-to-iso-8859-5* 934 (the fixnum (- code #x400))))))) 935 936 (declare (type (mod #x110000) code)) 937 (funcall write-function stream (or c2 (char-code #\Sub))) 938 1))) 939 :stream-decode-function 940 (nfunction 941 iso-8859-5-stream-decode 942 (lambda (1st-unit next-unit-function stream) 943 (declare (ignore next-unit-function stream) 944 (type (unsigned-byte 8) 1st-unit)) 945 (if (< 1st-unit #xa0) 946 (code-char 1st-unit) 947 (svref *iso-8859-5-to-unicode* (the fixnum (- 1st-unit #xa0)))))) 948 :vector-encode-function 949 (nfunction 950 iso-8859-5-vector-encode 951 (lambda (string vector idx start end) 952 (declare (type (simple-array (unsigned-byte 8) (*)) vector) 953 (fixnum idx)) 954 (do* ((i start (1+ i))) 955 ((>= i end) idx) 956 (let* ((char (schar string i)) 957 (code (char-code char)) 958 (c2 (cond ((< code #xa0) code) 959 ((< code #xb0) 960 (svref *unicode-a0-b0-to-iso-8859-5* 961 (the fixnum (- code #xa0)))) 962 ((and (>= code #x400) (< code #x460)) 963 (svref *unicode-400-460-to-iso-8859-5* 964 (the fixnum (- code #x400))))))) 965 (declare (type (mod #x110000) code)) 966 (setf (aref vector idx) (or c2 (char-code #\Sub))) 967 (incf idx))))) 968 :vector-decode-function 969 (nfunction 970 iso-8859-5-vector-decode 971 (lambda (vector idx noctets string) 972 (declare (type (simple-array (unsigned-byte 8) (*)) vector)) 973 (do* ((i 0 (1+ i)) 974 (index idx (1+ index))) 975 ((>= i noctets) index) 976 (let* ((1st-unit (aref vector index))) 977 (declare (type (unsigned-byte 8) 1st-unit)) 978 (setf (schar string i) 979 (if (< 1st-unit #xa0) 980 (code-char 1st-unit) 981 (svref *iso-8859-5-to-unicode* (the fixnum (- 1st-unit #xa0))))))))) 982 :memory-encode-function 983 (nfunction 984 iso-8859-5-memory-encode 985 (lambda (string pointer idx start end) 986 (do* ((i start (1+ i))) 987 ((>= i end) idx) 988 (let* ((code (char-code (schar string i))) 989 (c2 (cond ((< code #xa0) code) 990 ((< code #xb0) 991 (svref *unicode-a0-b0-to-iso-8859-5* 992 (the fixnum (- code #xa0)))) 993 ((and (>= code #x400) (< code #x460)) 994 (svref *unicode-400-460-to-iso-8859-5* 995 (the fixnum (- code #x400))))))) 996 (declare (type (mod #x110000) code)) 997 (setf (%get-unsigned-byte pointer idx) (or c2 (char-code #\Sub))) 998 (incf idx))))) 999 :memory-decode-function 1000 (nfunction 1001 iso-8859-5-memory-decode 1002 (lambda (pointer noctets idx string) 1003 (do* ((i 0 (1+ i)) 1004 (index idx (1+ index))) 1005 ((>= i noctets) index) 1006 (let* ((1st-unit (%get-unsigned-byte pointer index))) 1007 (declare (type (unsigned-byte 8) 1st-unit)) 1008 (setf (schar string i) 1009 (if (< 1st-unit #xa0) 1010 (code-char 1st-unit) 1011 (svref *iso-8859-5-to-unicode* (the fixnum (- 1st-unit #xa0))))))))) 1012 :octets-in-string-function 1013 #'8-bit-fixed-width-octets-in-string 1014 :length-of-vector-encoding-function 1015 #'8-bit-fixed-width-length-of-vector-encoding 1016 :length-of-memory-encoding-function 1017 #'8-bit-fixed-width-length-of-memory-encoding 1018 :literal-char-code-limit #xa0 1019 ) 1020 1021 (defparameter *iso-8859-6-to-unicode* 1022 #( 1023 ;; #xa0 1024 #\u+00a0 #\u+fffd #\u+fffd #\u+fffd #\u+00a4 #\u+fffd #\u+fffd #\u+fffd 1025 #\u+fffd #\u+fffd #\u+fffd #\u+fffd #\u+060c #\u+00ad #\u+fffd #\u+fffd 1026 ;; #xb0 1027 #\u+fffd #\u+fffd #\u+fffd #\u+fffd #\u+fffd #\u+fffd #\u+fffd #\u+fffd 1028 #\u+fffd #\u+fffd #\u+fffd #\u+061b #\u+fffd #\u+fffd #\u+fffd #\u+061f 1029 ;; #xc0 1030 #\u+fffd #\u+0621 #\u+0622 #\u+0623 #\u+0624 #\u+0625 #\u+0626 #\u+0627 1031 #\u+0628 #\u+0629 #\u+062a #\u+062b #\u+062c #\u+062d #\u+062e #\u+062f 1032 ;; #xd0 1033 #\u+0630 #\u+0631 #\u+0632 #\u+0633 #\u+0634 #\u+0635 #\u+0636 #\u+0637 1034 #\u+0638 #\u+0639 #\u+063a #\u+fffd #\u+fffd #\u+fffd #\u+fffd #\u+fffd 1035 ;; #xe0 1036 #\u+0640 #\u+0641 #\u+0642 #\u+0643 #\u+0644 #\u+0645 #\u+0646 #\u+0647 1037 #\u+0648 #\u+0649 #\u+064a #\u+064b #\u+064c #\u+064d #\u+064e #\u+064f 1038 ;; #xf0 1039 #\u+0650 #\u+0651 #\u+0652 #\u+fffd #\u+fffd #\u+fffd #\u+fffd #\u+fffd 1040 #\u+fffd #\u+fffd #\u+fffd #\u+fffd #\u+fffd #\u+fffd #\u+fffd #\u+fffd 1041 )) 1042 1043 (defparameter *unicode-a0-b0-to-iso-8859-6* 1044 #( 1045 0xa0 nil nil nil 0xa4 nil nil nil ; #xa0-#xa7 1046 nil nil nil nil nil #xad nil nil ; #xa8-#xaf 1047 )) 1048 1049 1050 (defparameter *unicode-608-658-to-iso-8859-6* 1051 #( 1052 nil nil nil nil #xac nil nil nil ; #x608-#x60f 1053 nil nil nil nil nil nil nil nil ; #x610-#x617 1054 nil nil nil #xbb nil nil nil #xbf ; #x618-#x61f 1055 nil #xc1 #xc2 #xc3 #xc4 #xc5 #xc6 #xc7 ; #x620-#x627 1056 #xc8 #xc9 #xca #xcb #xcc #xcd #xce #xcf ; #x628-#x62f 1057 #xd0 #xd1 #xd2 #xd3 #xd4 #xd5 #xd6 #xd7 ; #x630-#x637 1058 #xd8 #xd9 #xda nil nil nil nil nil ; #x638-#x63f 1059 #xe0 #xe1 #xe2 #xe3 #xe4 #xe5 #xe6 #xe7 ; #x640-#x647 1060 #xe8 #xe9 #xea #xeb #xec #xed #xee #xef ; #x648-#x64f 1061 #xf0 #xf1 #xf2 nil nil nil nil nil ; #x650-#x657 1062 )) 1063 1064 (define-character-encoding :iso-8859-6 1065 "An 8-bit, fixed-width character encoding in which codes #x00-#x9f 1066 map to their Unicode equivalents and other codes map to other Unicode 1067 character values. Intended to provide most characters found in the 1068 Arabic alphabet." 1069 1070 :aliases '(:iso_8859-6 :arabic :csISOLatinArabic :iso-ir-127) 1071 :stream-encode-function 1072 (nfunction 1073 iso-8859-6-stream-encode 1074 (lambda (char write-function stream) 1075 (let* ((code (char-code char)) 1076 (c2 (cond ((< code #xa0) code) 1077 ((< code #xb0) 1078 (svref *unicode-a0-b0-to-iso-8859-6* 1079 (the fixnum (- code #xa0)))) 1080 ((and (>= code #x608) (< code #x658)) 1081 (svref *unicode-608-658-to-iso-8859-6* 1082 (the fixnum (- code #x608))))))) 1083 1084 (declare (type (mod #x110000) code)) 1085 (funcall write-function stream (or c2 (char-code #\Sub))) 1086 1))) 1087 :stream-decode-function 1088 (nfunction 1089 iso-8859-6-stream-decode 1090 (lambda (1st-unit next-unit-function stream) 1091 (declare (ignore next-unit-function stream) 1092 (type (unsigned-byte 8) 1st-unit)) 1093 (if (< 1st-unit #xa0) 1094 (code-char 1st-unit) 1095 (svref *iso-8859-6-to-unicode* (the fixnum (- 1st-unit #xa0)))))) 1096 :vector-encode-function 1097 (nfunction 1098 iso-8859-6-vector-encode 1099 (lambda (string vector idx start end) 1100 (declare (type (simple-array (unsigned-byte 8) (*)) vector) 1101 (fixnum idx)) 1102 (do* ((i start (1+ i))) 1103 ((>= i end) idx) 1104 (let* ((char (schar string i)) 1105 (code (char-code char)) 1106 (c2 (cond ((< code #xa0) code) 1107 ((< code #xb0) 1108 (svref *unicode-a0-b0-to-iso-8859-6* 1109 (the fixnum (- code #xa0)))) 1110 ((and (>= code #x608) (< code #x658)) 1111 (svref *unicode-608-658-to-iso-8859-6* 1112 (the fixnum (- code #x608))))))) 1113 (declare (type (mod #x110000) code)) 1114 (setf (aref vector idx) (or c2 (char-code #\Sub))) 1115 (incf idx))))) 1116 :vector-decode-function 1117 (nfunction 1118 iso-8859-6-vector-decode 1119 (lambda (vector idx noctets string) 1120 (declare (type (simple-array (unsigned-byte 8) (*)) vector)) 1121 (do* ((i 0 (1+ i)) 1122 (index idx (1+ index))) 1123 ((>= i noctets) index) 1124 (let* ((1st-unit (aref vector index))) 1125 (declare (type (unsigned-byte 8) 1st-unit)) 1126 (setf (schar string i) 1127 (if (< 1st-unit #xa0) 1128 (code-char 1st-unit) 1129 (svref *iso-8859-6-to-unicode* (the fixnum (- 1st-unit #xa0))))))))) 1130 :memory-encode-function 1131 (nfunction 1132 iso-8859-6-memory-encode 1133 (lambda (string pointer idx start end) 1134 (do* ((i start (1+ i))) 1135 ((>= i end) idx) 1136 (let* ((code (char-code (schar string i))) 1137 (c2 (cond ((< code #xa0) code) 1138 ((< code #xb0) 1139 (svref *unicode-a0-b0-to-iso-8859-6* 1140 (the fixnum (- code #xa0)))) 1141 ((and (>= code #x608) (< code #x658)) 1142 (svref *unicode-608-658-to-iso-8859-6* 1143 (the fixnum (- code #x608))))))) 1144 (declare (type (mod #x110000) code)) 1145 (setf (%get-unsigned-byte pointer idx) (or c2 (char-code #\Sub))) 1146 (incf idx))))) 1147 :memory-decode-function 1148 (nfunction 1149 iso-8859-6-memory-decode 1150 (lambda (pointer noctets idx string) 1151 (do* ((i 0 (1+ i)) 1152 (index idx (1+ index))) 1153 ((>= i noctets) index) 1154 (let* ((1st-unit (%get-unsigned-byte pointer index))) 1155 (declare (type (unsigned-byte 8) 1st-unit)) 1156 (setf (schar string i) 1157 (if (< 1st-unit #xa0) 1158 (code-char 1st-unit) 1159 (svref *iso-8859-6-to-unicode* (the fixnum (- 1st-unit #xa0))))))))) 1160 :octets-in-string-function 1161 #'8-bit-fixed-width-octets-in-string 1162 :length-of-vector-encoding-function 1163 #'8-bit-fixed-width-length-of-vector-encoding 1164 :length-of-memory-encoding-function 1165 #'8-bit-fixed-width-length-of-memory-encoding 1166 :literal-char-code-limit #xa0 1167 ) 1168 1169 (defparameter *iso-8859-7-to-unicode* 1170 #( 1171 ;; #xa0 1172 #\u+00a0 #\u+2018 #\u+2019 #\u+00a3 #\u+20ac #\u+20af #\u+00a6 #\u+00a7 1173 #\u+00a8 #\u+00a9 #\u+037a #\u+00ab #\u+00ac #\u+00ad #\u+fffd #\u+2015 1174 ;; #xb0 1175 #\u+00b0 #\u+00b1 #\u+00b2 #\u+00b3 #\u+0384 #\u+0385 #\u+0386 #\u+00b7 1176 #\u+0388 #\u+0389 #\u+038a #\u+00bb #\u+038c #\u+00bd #\u+038e #\u+038f 1177 ;; #xc0 1178 #\u+0390 #\u+0391 #\u+0392 #\u+0393 #\u+0394 #\u+0395 #\u+0396 #\u+0397 1179 #\u+0398 #\u+0399 #\u+039a #\u+039b #\u+039c #\u+039d #\u+039e #\u+039f 1180 ;; #xd0 1181 #\u+03a0 #\u+03a1 #\u+fffd #\u+03a3 #\u+03a4 #\u+03a5 #\u+03a6 #\u+03a7 1182 #\u+03a8 #\u+03a9 #\u+03aa #\u+03ab #\u+03ac #\u+03ad #\u+03ae #\u+03af 1183 ;; #xe0 1184 #\u+03b0 #\u+03b1 #\u+03b2 #\u+03b3 #\u+03b4 #\u+03b5 #\u+03b6 #\u+03b7 1185 #\u+03b8 #\u+03b9 #\u+03ba #\u+03bb #\u+03bc #\u+03bd #\u+03be #\u+03bf 1186 ;; #xf0 1187 #\u+03c0 #\u+03c1 #\u+03c2 #\u+03c3 #\u+03c4 #\u+03c5 #\u+03c6 #\u+03c7 1188 #\u+03c8 #\u+03c9 #\u+03ca #\u+03cb #\u+03cc #\u+03cd #\u+03ce #\u+fffd 1189 )) 1190 1191 (defparameter *unicode-a0-c0-to-iso-8859-7* 1192 #( 1193 #xa0 nil nil #xa3 nil nil #xa6 #xa7 ; #xa0-#xa7 1194 #xa8 #xa9 nil #xab #xac #xad nil nil ; #xa8-#xaf 1195 #xb0 #xb1 #xb2 #xb3 nil nil nil #xb7 ; #xb0-#xb7 1196 nil nil nil #xbb nil #xbd nil nil ; #xb8-#xbf 1197 )) 1198 1199 (defparameter *unicode-378-3d0-to-iso-8859-7* 1200 #( 1201 nil nil #xaa nil nil nil nil nil ; #x378-#x37f 1202 nil nil nil nil #xb4 #xb5 #xb6 nil ; #x380-#x387 1203 #xb8 #xb9 #xba nil #xbc nil #xbe #xbf ; #x388-#x38f 1204 #xc0 #xc1 #xc2 #xc3 #xc4 #xc5 #xc6 #xc7 ; #x390-#x397 1205 #xc8 #xc9 #xca #xcb #xcc #xcd #xce #xcf ; #x398-#x39f 1206 #xd0 #xd1 nil #xd3 #xd4 #xd5 #xd6 #xd7 ; #x3a0-#x3a7 1207 #xd8 #xd9 #xda #xdb #xdc #xdd #xde #xdf ; #x3a8-#x3af 1208 #xe0 #xe1 #xe2 #xe3 #xe4 #xe5 #xe6 #xe7 ; #x3b0-#x3b7 1209 #xe8 #xe9 #xea #xeb #xec #xed #xee #xef ; #x3b8-#x3bf 1210 #xf0 #xf1 #xf2 #xf3 #xf4 #xf5 #xf6 #xf7 ; #x3c0-#x3c7 1211 #xf8 #xf9 #xfa #xfb #xfc #xfd #xfe nil ; #x3c8-#x3cf 1212 )) 1213 1214 (defparameter *unicode-2010-2020-to-iso-8859-7* 1215 #( 1216 nil nil nil nil nil #xaf nil nil ; #x2010-#x2017 1217 #xa1 #xa2 nil nil nil nil nil nil ; #x2018-#x201f 1218 )) 1219 1220 (defparameter *unicode-20ac-20b0-to-iso-8859-7* 1221 #( 1222 #xa4 nil nil #xa5 1223 )) 1224 1225 (define-character-encoding :iso-8859-7 1226 "An 8-bit, fixed-width character encoding in which codes #x00-#x9f 1227 map to their Unicode equivalents and other codes map to other Unicode 1228 character values. Intended to provide most characters found in the 1229 Greek alphabet." 1230 1231 :aliases '(:iso_8859-7 :greek :greek8 :csISOLatinGreek :iso-ir-126 :ELOT_928 :ecma-118) 1232 :stream-encode-function 1233 (nfunction 1234 iso-8859-7-stream-encode 1235 (lambda (char write-function stream) 1236 (let* ((code (char-code char)) 1237 (c2 (cond ((< code #xa0) code) 1238 ((< code #xc0) 1239 (svref *unicode-a0-c0-to-iso-8859-7* 1240 (the fixnum (- code #xa0)))) 1241 ((and (>= code #x378) (< code #x3d0)) 1242 (svref *unicode-378-3d0-to-iso-8859-7* 1243 (the fixnum (- code #x378)))) 1244 ((and (>= code #x2010) (< code #x2020)) 1245 (svref *unicode-2010-2020-to-iso-8859-7* 1246 (the fixnum (- code #x2010)))) 1247 ((and (>= code #x20ac) (< code #x20b0)) 1248 (svref *unicode-20ac-20b0-to-iso-8859-7* 1249 (the fixnum (- code #x20ac))))))) 1250 1251 (declare (type (mod #x110000) code)) 1252 (funcall write-function stream (or c2 (char-code #\Sub))) 1253 1))) 1254 :stream-decode-function 1255 (nfunction 1256 iso-8859-7-stream-decode 1257 (lambda (1st-unit next-unit-function stream) 1258 (declare (ignore next-unit-function stream) 1259 (type (unsigned-byte 8) 1st-unit)) 1260 (if (< 1st-unit #xa0) 1261 (code-char 1st-unit) 1262 (svref *iso-8859-7-to-unicode* (the fixnum (- 1st-unit #xa0)))))) 1263 :vector-encode-function 1264 (nfunction 1265 iso-8859-7-vector-encode 1266 (lambda (string vector idx start end) 1267 (declare (type (simple-array (unsigned-byte 8) (*)) vector) 1268 (fixnum idx)) 1269 (do* ((i start (1+ i))) 1270 ((>= i end) idx) 1271 (let* ((char (schar string i)) 1272 (code (char-code char)) 1273 (c2 (cond ((< code #xa0) code) 1274 ((< code #xc0) 1275 (svref *unicode-a0-c0-to-iso-8859-7* 1276 (the fixnum (- code #xa0)))) 1277 ((and (>= code #x378) (< code #x3d0)) 1278 (svref *unicode-378-3d0-to-iso-8859-7* 1279 (the fixnum (- code #x378)))) 1280 ((and (>= code #x2010) (< code #x2020)) 1281 (svref *unicode-2010-2020-to-iso-8859-7* 1282 (the fixnum (- code #x2010)))) 1283 ((and (>= code #x20ac) (< code #x20b0)) 1284 (svref *unicode-20ac-20b0-to-iso-8859-7* 1285 (the fixnum (- code #x20ac))))))) 1286 (declare (type (mod #x110000) code)) 1287 (setf (aref vector idx) (or c2 (char-code #\Sub))) 1288 (incf idx))))) 1289 :vector-decode-function 1290 (nfunction 1291 iso-8859-7-vector-decode 1292 (lambda (vector idx noctets string) 1293 (declare (type (simple-array (unsigned-byte 8) (*)) vector)) 1294 (do* ((i 0 (1+ i)) 1295 (index idx (1+ index))) 1296 ((>= i noctets) index) 1297 (let* ((1st-unit (aref vector index))) 1298 (declare (type (unsigned-byte 8) 1st-unit)) 1299 (setf (schar string i) 1300 (if (< 1st-unit #xa0) 1301 (code-char 1st-unit) 1302 (svref *iso-8859-7-to-unicode* (the fixnum (- 1st-unit #xa0))))))))) 1303 :memory-encode-function 1304 (nfunction 1305 iso-8859-7-memory-encode 1306 (lambda (string pointer idx start end) 1307 (do* ((i start (1+ i))) 1308 ((>= i end) idx) 1309 (let* ((code (char-code (schar string i))) 1310 (c2 (cond ((< code #xa0) code) 1311 ((< code #xc0) 1312 (svref *unicode-a0-c0-to-iso-8859-7* 1313 (the fixnum (- code #xa0)))) 1314 ((and (>= code #x378) (< code #x3d0)) 1315 (svref *unicode-378-3d0-to-iso-8859-7* 1316 (the fixnum (- code #x378)))) 1317 ((and (>= code #x2010) (< code #x2020)) 1318 (svref *unicode-2010-2020-to-iso-8859-7* 1319 (the fixnum (- code #x2010)))) 1320 ((and (>= code #x20ac) (< code #x20b0)) 1321 (svref *unicode-20ac-20b0-to-iso-8859-7* 1322 (the fixnum (- code #x20ac))))))) 1323 (declare (type (mod #x110000) code)) 1324 (setf (%get-unsigned-byte pointer idx) (or c2 (char-code #\Sub))) 1325 (incf idx))))) 1326 :memory-decode-function 1327 (nfunction 1328 iso-8859-7-memory-decode 1329 (lambda (pointer noctets idx string) 1330 (do* ((i 0 (1+ i)) 1331 (index idx (1+ index))) 1332 ((>= i noctets) index) 1333 (let* ((1st-unit (%get-unsigned-byte pointer index))) 1334 (declare (type (unsigned-byte 8) 1st-unit)) 1335 (setf (schar string i) 1336 (if (< 1st-unit #xa0) 1337 (code-char 1st-unit) 1338 (svref *iso-8859-7-to-unicode* (the fixnum (- 1st-unit #xa0))))))))) 1339 :octets-in-string-function 1340 #'8-bit-fixed-width-octets-in-string 1341 :length-of-vector-encoding-function 1342 #'8-bit-fixed-width-length-of-vector-encoding 1343 :length-of-memory-encoding-function 1344 #'8-bit-fixed-width-length-of-memory-encoding 1345 :literal-char-code-limit #xa0 1346 ) 1347 1348 (defparameter *iso-8859-8-to-unicode* 1349 #( 1350 ;; #xa0 1351 #\u+00a0 #\u+fffd #\u+00a2 #\u+00a3 #\u+00a4 #\u+00a5 #\u+00a6 #\u+00a7 1352 #\u+00a8 #\u+00a9 #\u+00d7 #\u+00ab #\u+00ac #\u+00ad #\u+00ae #\u+00af 1353 ;; #xb0 1354 #\u+00b0 #\u+00b1 #\u+00b2 #\u+00b3 #\u+00b4 #\u+00b5 #\u+00b6 #\u+00b7 1355 #\u+00b8 #\u+00b9 #\u+00f7 #\u+00bb #\u+00bc #\u+00bd #\u+00be #\u+fffd 1356 ;; #xc0 1357 #\u+fffd #\u+fffd #\u+fffd #\u+fffd #\u+fffd #\u+fffd #\u+fffd #\u+fffd 1358 #\u+fffd #\u+fffd #\u+fffd #\u+fffd #\u+fffd #\u+fffd #\u+fffd #\u+fffd 1359 ;; #xd0 1360 #\u+fffd #\u+fffd #\u+fffd #\u+fffd #\u+fffd #\u+fffd #\u+fffd #\u+fffd 1361 #\u+fffd #\u+fffd #\u+fffd #\u+fffd #\u+fffd #\u+fffd #\u+fffd #\u+2017 1362 ;; #xe0 1363 #\u+05d0 #\u+05d1 #\u+05d2 #\u+05d3 #\u+05d4 #\u+05d5 #\u+05d6 #\u+05d7 1364 #\u+05d8 #\u+05d9 #\u+05da #\u+05db #\u+05dc #\u+05dd #\u+05de #\u+05df 1365 ;; #xf0 1366 #\u+05e0 #\u+05e1 #\u+05e2 #\u+05e3 #\u+05e4 #\u+05e5 #\u+05e6 #\u+05e7 1367 #\u+05e8 #\u+05e9 #\u+05ea #\u+fffd #\u+fffd #\u+200e #\u+200f #\u+fffd 1368 )) 1369 1370 (defparameter *unicode-a0-f8-to-iso-8859-8* 1371 #( 1372 #xa0 nil #xa2 #xa3 #xa4 #xa5 #xa6 #xa7 ; #xa0-#xa7 1373 #xa8 #xa9 nil #xab #xac #xad #xae #xaf ; #xa8-#xaf 1374 #xb0 #xb1 #xb2 #xb3 #xb4 #xb5 #xb6 #xb7 ; #xb0-#xb7 1375 #xb8 #xb9 nil #xbb #xbc #xbd #xbe nil ; #xb8-#xbf 1376 nil nil nil nil nil nil nil nil ; #xc0-#xc7 1377 nil nil nil nil nil nil nil nil ; #xc8-#xcf 1378 nil nil nil nil nil nil nil #xaa ; #xd0-#xd7 1379 nil nil nil nil nil nil nil nil ; #xd8-#xdf 1380 nil nil nil nil nil nil nil nil ; #xe0-#xe7 1381 nil nil nil nil nil nil nil nil ; #xe8-#xef 1382 nil nil nil nil nil nil nil #xba ; #xf0-#xf7 1383 )) 1384 1385 (defparameter *unicode-5d0-5f0-to-iso-8859-8* 1386 #( 1387 #xe0 #xe1 #xe2 #xe3 #xe4 #xe5 #xe6 #xe7 ; #x5d0-#x5d7 1388 #xe8 #xe9 #xea #xeb #xec #xed #xee #xef ; #x5d8-#x5df 1389 #xf0 #xf1 #xf2 #xf3 #xf4 #xf5 #xf6 #xf7 ; #x5e0-#x5e7 1390 #xf8 #xf9 #xfa nil nil nil nil nil ; #x5e8-#x5ef 1391 )) 1392 1393 (defparameter *unicode-2008-2018-to-iso-8859-8* 1394 #( 1395 nil nil nil nil nil nil #xfd #xfe ; #x2008-#x200f 1396 nil nil nil nil nil nil nil #xdf ; #x2010-#x2017 1397 )) 1398 1399 (define-character-encoding :iso-8859-8 1400 "An 8-bit, fixed-width character encoding in which codes #x00-#x9f 1401 map to their Unicode equivalents and other codes map to other Unicode 1402 character values. Intended to provide most characters found in the 1403 Hebrew alphabet." 1404 1405 :aliases '(:iso_8859-8 :hebrew :csISOLatinHebrew :iso-ir-138) 1406 :stream-encode-function 1407 (nfunction 1408 iso-8859-8-stream-encode 1409 (lambda (char write-function stream) 1410 (let* ((code (char-code char)) 1411 (c2 (cond ((< code #xa0) code) 1412 ((< code #xf8) 1413 (svref *unicode-a0-f8-to-iso-8859-8* 1414 (the fixnum (- code #xa0)))) 1415 ((and (>= code #x5d0) (< code #x5f0)) 1416 (svref *unicode-5d0-5f0-to-iso-8859-8* 1417 (the fixnum (- code #x5d0)))) 1418 ((and (>= code #x2008) (< code #x2018)) 1419 (svref *unicode-2008-2018-to-iso-8859-8* 1420 (the fixnum (- code #x2008))))))) 1421 1422 (declare (type (mod #x110000) code)) 1423 (funcall write-function stream (or c2 (char-code #\Sub))) 1424 1))) 1425 :stream-decode-function 1426 (nfunction 1427 iso-8859-8-stream-decode 1428 (lambda (1st-unit next-unit-function stream) 1429 (declare (ignore next-unit-function stream) 1430 (type (unsigned-byte 8) 1st-unit)) 1431 (if (< 1st-unit #xa0) 1432 (code-char 1st-unit) 1433 (svref *iso-8859-8-to-unicode* (the fixnum (- 1st-unit #xa0)))))) 1434 :vector-encode-function 1435 (nfunction 1436 iso-8859-8-vector-encode 1437 (lambda (string vector idx start end) 1438 (declare (type (simple-array (unsigned-byte 8) (*)) vector) 1439 (fixnum idx)) 1440 (do* ((i start (1+ i))) 1441 ((>= i end) idx) 1442 (let* ((char (schar string i)) 1443 (code (char-code char)) 1444 (c2 (cond ((< code #xa0) code) 1445 ((< code #xf8) 1446 (svref *unicode-a0-f8-to-iso-8859-8* 1447 (the fixnum (- code #xa0)))) 1448 ((and (>= code #x5d0) (< code #x5f0)) 1449 (svref *unicode-5d0-5f0-to-iso-8859-8* 1450 (the fixnum (- code #x5d0)))) 1451 ((and (>= code #x2008) (< code #x2018)) 1452 (svref *unicode-2008-2018-to-iso-8859-8* 1453 (the fixnum (- code #x2008))))))) 1454 (declare (type (mod #x110000) code)) 1455 (setf (aref vector idx) (or c2 (char-code #\Sub))) 1456 (incf idx))))) 1457 :vector-decode-function 1458 (nfunction 1459 iso-8859-8-vector-decode 1460 (lambda (vector idx noctets string) 1461 (declare (type (simple-array (unsigned-byte 8) (*)) vector)) 1462 (do* ((i 0 (1+ i)) 1463 (index idx (1+ index))) 1464 ((>= i noctets) index) 1465 (let* ((1st-unit (aref vector index))) 1466 (declare (type (unsigned-byte 8) 1st-unit)) 1467 (setf (schar string i) 1468 (if (< 1st-unit #xa0) 1469 (code-char 1st-unit) 1470 (svref *iso-8859-8-to-unicode* (the fixnum (- 1st-unit #xa0))))))))) 1471 :memory-encode-function 1472 (nfunction 1473 iso-8859-8-memory-encode 1474 (lambda (string pointer idx start end) 1475 (do* ((i start (1+ i))) 1476 ((>= i end) idx) 1477 (let* ((code (char-code (schar string i))) 1478 (c2 (cond ((< code #xa0) code) 1479 ((< code #xf8) 1480 (svref *unicode-a0-f8-to-iso-8859-8* 1481 (the fixnum (- code #xa0)))) 1482 ((and (>= code #x5d0) (< code #x5f0)) 1483 (svref *unicode-5d0-5f0-to-iso-8859-8* 1484 (the fixnum (- code #x5d0)))) 1485 ((and (>= code #x2008) (< code #x2018)) 1486 (svref *unicode-2008-2018-to-iso-8859-8* 1487 (the fixnum (- code #x2008))))))) 1488 (declare (type (mod #x110000) code)) 1489 (setf (%get-unsigned-byte pointer idx) (or c2 (char-code #\Sub))) 1490 (incf idx))))) 1491 :memory-decode-function 1492 (nfunction 1493 iso-8859-8-memory-decode 1494 (lambda (pointer noctets idx string) 1495 (do* ((i 0 (1+ i)) 1496 (index idx (1+ index))) 1497 ((>= i noctets) index) 1498 (let* ((1st-unit (%get-unsigned-byte pointer index))) 1499 (declare (type (unsigned-byte 8) 1st-unit)) 1500 (setf (schar string i) 1501 (if (< 1st-unit #xa0) 1502 (code-char 1st-unit) 1503 (svref *iso-8859-8-to-unicode* (the fixnum (- 1st-unit #xa0))))))))) 1504 :octets-in-string-function 1505 #'8-bit-fixed-width-octets-in-string 1506 :length-of-vector-encoding-function 1507 #'8-bit-fixed-width-length-of-vector-encoding 1508 :length-of-memory-encoding-function 1509 #'8-bit-fixed-width-length-of-memory-encoding 1510 :literal-char-code-limit #xa0 1511 ) 1512 1513 (defparameter *iso-8859-9-to-unicode* 1514 #( 1515 ;; #xd0 1516 #\u+011e #\u+00d1 #\u+00d2 #\u+00d3 #\u+00d4 #\u+00d5 #\u+00d6 #\u+00d7 1517 #\u+00d8 #\u+00d9 #\u+00da #\u+00db #\u+00dc #\u+0130 #\u+015e #\u+00df 1518 ;; #xe0 1519 #\u+00e0 #\u+00e1 #\u+00e2 #\u+00e3 #\u+00e4 #\u+00e5 #\u+00e6 #\u+00e7 1520 #\u+00e8 #\u+00e9 #\u+00ea #\u+00eb #\u+00ec #\u+00ed #\u+00ee #\u+00ef 1521 ;; #xf0 1522 #\u+011f #\u+00f1 #\u+00f2 #\u+00f3 #\u+00f4 #\u+00f5 #\u+00f6 #\u+00f7 1523 #\u+00f8 #\u+00f9 #\u+00fa #\u+00fb #\u+00fc #\u+0131 #\u+015f #\u+00ff 1524 )) 1525 1526 (defparameter *unicode-d0-100-to-iso-8859-9* 1527 #( 1528 nil #xd1 #xd2 #xd3 #xd4 #xd5 #xd6 #xd7 ; #xd0-#xd7 1529 #xd8 #xd9 #xda #xdb #xdc nil nil #xdf ; #xd8-#xdf 1530 #xe0 #xe1 #xe2 #xe3 #xe4 #xe5 #xe6 #xe7 ; #xe0-#xe7 1531 #xe8 #xe9 #xea #xeb #xec #xed #xee #xef ; #xe8-#xef 1532 nil #xf1 #xf2 #xf3 #xf4 #xf5 #xf6 #xf7 ; #xf0-#xf7 1533 #xf8 #xf9 #xfa #xfb #xfc nil nil #xff ; #xf8-#xff 1534 )) 1535 1536 (defparameter *unicode-118-160-to-iso-8859-9* 1537 #( 1538 nil nil nil nil nil nil #xd0 #xf0 ; #x118-#x11f 1539 nil nil nil nil nil nil nil nil ; #x120-#x127 1540 nil nil nil nil nil nil nil nil ; #x128-#x12f 1541 #xdd #xfd nil nil nil nil nil nil ; #x130-#x137 1542 nil nil nil nil nil nil nil nil ; #x138-#x13f 1543 nil nil nil nil nil nil nil nil ; #x140-#x147 1544 nil nil nil nil nil nil nil nil ; #x148-#x14f 1545 nil nil nil nil nil nil nil nil ; #x150-#x157 1546 nil nil nil nil nil nil #xde #xfe ; #x158-#x15f 1547 )) 1548 1549 1550 (define-character-encoding :iso-8859-9 1551 "An 8-bit, fixed-width character encoding in which codes #x00-#xcf 1552 map to their Unicode equivalents and other codes map to other Unicode 1553 character values. Intended to provide most characters found in the 1554 Turkish alphabet." 1555 1556 :aliases '(:iso_8859-9 :latin5 :csISOLatin5 :iso-ir-148) 1557 :stream-encode-function 1558 (nfunction 1559 iso-8859-9-stream-encode 1560 (lambda (char write-function stream) 1561 (let* ((code (char-code char)) 1562 (c2 (cond ((< code #xd0) code) 1563 ((< code #x100) 1564 (svref *unicode-d0-100-to-iso-8859-9* 1565 (the fixnum (- code #xd0)))) 1566 ((and (>= code #x118) (< code #x160)) 1567 (svref *unicode-118-160-to-iso-8859-9* 1568 (the fixnum (- code #x118))))))) 1569 1570 (declare (type (mod #x110000) code)) 1571 (funcall write-function stream (or c2 (char-code #\Sub))) 1572 1))) 1573 :stream-decode-function 1574 (nfunction 1575 iso-8859-9-stream-decode 1576 (lambda (1st-unit next-unit-function stream) 1577 (declare (ignore next-unit-function stream) 1578 (type (unsigned-byte 8) 1st-unit)) 1579 (if (< 1st-unit #xa0) 1580 (code-char 1st-unit) 1581 (svref *iso-8859-9-to-unicode* (the fixnum (- 1st-unit #xa0)))))) 1582 :vector-encode-function 1583 (nfunction 1584 iso-8859-9-vector-encode 1585 (lambda (string vector idx start end) 1586 (declare (type (simple-array (unsigned-byte 8) (*)) vector) 1587 (fixnum idx)) 1588 (do* ((i start (1+ i))) 1589 ((>= i end) idx) 1590 (let* ((char (schar string i)) 1591 (code (char-code char)) 1592 (c2 (cond ((< code #xd0) code) 1593 ((< code #x100) 1594 (svref *unicode-d0-100-to-iso-8859-9* 1595 (the fixnum (- code #xd0)))) 1596 ((and (>= code #x118) (< code #x160)) 1597 (svref *unicode-118-160-to-iso-8859-9* 1598 (the fixnum (- code #x118))))))) 1599 (declare (type (mod #x110000) code)) 1600 (setf (aref vector idx) (or c2 (char-code #\Sub))) 1601 (incf idx))))) 1602 :vector-decode-function 1603 (nfunction 1604 iso-8859-9-vector-decode 1605 (lambda (vector idx noctets string) 1606 (declare (type (simple-array (unsigned-byte 8) (*)) vector)) 1607 (do* ((i 0 (1+ i)) 1608 (index idx (1+ index))) 1609 ((>= i noctets) index) 1610 (let* ((1st-unit (aref vector index))) 1611 (declare (type (unsigned-byte 8) 1st-unit)) 1612 (setf (schar string i) 1613 (if (< 1st-unit #xa0) 1614 (code-char 1st-unit) 1615 (svref *iso-8859-9-to-unicode* (the fixnum (- 1st-unit #xa0))))))))) 1616 :memory-encode-function 1617 (nfunction 1618 iso-8859-9-memory-encode 1619 (lambda (string pointer idx start end) 1620 (do* ((i start (1+ i))) 1621 ((>= i end) idx) 1622 (let* ((code (char-code (schar string i))) 1623 (c2 (cond ((< code #xd0) code) 1624 ((< code #x100) 1625 (svref *unicode-d0-100-to-iso-8859-9* 1626 (the fixnum (- code #xd0)))) 1627 ((and (>= code #x118) (< code #x160)) 1628 (svref *unicode-118-160-to-iso-8859-9* 1629 (the fixnum (- code #x118))))))) 1630 (declare (type (mod #x110000) code)) 1631 (setf (%get-unsigned-byte pointer idx) (or c2 (char-code #\Sub))) 1632 (incf idx))))) 1633 :memory-decode-function 1634 (nfunction 1635 iso-8859-9-memory-decode 1636 (lambda (pointer noctets idx string) 1637 (do* ((i 0 (1+ i)) 1638 (index idx (1+ index))) 1639 ((>= i noctets) index) 1640 (let* ((1st-unit (%get-unsigned-byte pointer index))) 1641 (declare (type (unsigned-byte 8) 1st-unit)) 1642 (setf (schar string i) 1643 (if (< 1st-unit #xa0) 1644 (code-char 1st-unit) 1645 (svref *iso-8859-9-to-unicode* (the fixnum (- 1st-unit #xa0))))))))) 1646 :octets-in-string-function 1647 #'8-bit-fixed-width-octets-in-string 1648 :length-of-vector-encoding-function 1649 #'8-bit-fixed-width-length-of-vector-encoding 1650 :length-of-memory-encoding-function 1651 #'8-bit-fixed-width-length-of-memory-encoding 1652 :literal-char-code-limit #xd0 1653 ) 1654 1655 (defparameter *iso-8859-10-to-unicode* 1656 #( 1657 ;; #xa0 1658 #\u+00a0 #\u+0104 #\u+0112 #\u+0122 #\u+012a #\u+0128 #\u+0136 #\u+00a7 1659 #\u+013b #\u+0110 #\u+0160 #\u+0166 #\u+017d #\u+00ad #\u+016a #\u+014a 1660 ;; #xb0 1661 #\u+00b0 #\u+0105 #\u+0113 #\u+0123 #\u+012b #\u+0129 #\u+0137 #\u+00b7 1662 #\u+013c #\u+0111 #\u+0161 #\u+0167 #\u+017e #\u+2015 #\u+016b #\u+014b 1663 ;; #xc0 1664 #\u+0100 #\u+00c1 #\u+00c2 #\u+00c3 #\u+00c4 #\u+00c5 #\u+00c6 #\u+012e 1665 #\u+010c #\u+00c9 #\u+0118 #\u+00cb #\u+0116 #\u+00cd #\u+00ce #\u+00cf 1666 ;; #xd0 1667 #\u+00d0 #\u+0145 #\u+014c #\u+00d3 #\u+00d4 #\u+00d5 #\u+00d6 #\u+0168 1668 #\u+00d8 #\u+0172 #\u+00da #\u+00db #\u+00dc #\u+00dd #\u+00de #\u+00df 1669 ;; #xe0 1670 #\u+0101 #\u+00e1 #\u+00e2 #\u+00e3 #\u+00e4 #\u+00e5 #\u+00e6 #\u+012f 1671 #\u+010d #\u+00e9 #\u+0119 #\u+00eb #\u+0117 #\u+00ed #\u+00ee #\u+00ef 1672 ;; #xf0 1673 #\u+00f0 #\u+0146 #\u+014d #\u+00f3 #\u+00f4 #\u+00f5 #\u+00f6 #\u+0169 1674 #\u+00f8 #\u+0173 #\u+00fa #\u+00fb #\u+00fc #\u+00fd #\u+00fe #\u+0138 1675 )) 1676 1677 (defparameter *unicode-a0-180-to-iso-8859-10* 1678 #( 1679 #xa0 nil nil nil nil nil nil #xa7 ; #xa0-#xa7 1680 nil nil nil nil nil #xad nil nil ; #xa8-#xaf 1681 #xb0 nil nil nil nil nil nil #xb7 ; #xb0-#xb7 1682 nil nil nil nil nil nil nil nil ; #xb8-#xbf 1683 nil #xc1 #xc2 #xc3 #xc4 #xc5 #xc6 nil ; #xc0-#xc7 1684 nil #xc9 nil #xcb nil #xcd #xce #xcf ; #xc8-#xcf 1685 #xd0 nil nil #xd3 #xd4 #xd5 #xd6 nil ; #xd0-#xd7 1686 #xd8 nil #xda #xdb #xdc #xdd #xde #xdf ; #xd8-#xdf 1687 nil #xe1 #xe2 #xe3 #xe4 #xe5 #xe6 nil ; #xe0-#xe7 1688 nil #xe9 nil #xeb nil #xed #xee #xef ; #xe8-#xef 1689 #xf0 nil nil #xf3 #xf4 #xf5 #xf6 nil ; #xf0-#xf7 1690 #xf8 nil #xfa #xfb #xfc #xfd #xfe nil ; #xf8-#xff 1691 #xc0 #xe0 nil nil #xa1 #xb1 nil nil ; #x100-#x107 1692 nil nil nil nil #xc8 #xe8 nil nil ; #x108-#x10f 1693 #xa9 #xb9 #xa2 #xb2 nil nil #xcc #xec ; #x110-#x117 1694 #xca #xea nil nil nil nil nil nil ; #x118-#x11f 1695 nil nil #xa3 #xb3 nil nil nil nil ; #x120-#x127 1696 #xa5 #xb5 #xa4 #xb4 nil nil #xc7 #xe7 ; #x128-#x12f 1697 nil nil nil nil nil nil #xa6 #xb6 ; #x130-#x137 1698 #xff nil nil #xa8 #xb8 nil nil nil ; #x138-#x13f 1699 nil nil nil nil nil #xd1 #xf1 nil ; #x140-#x147 1700 nil nil #xaf #xbf #xd2 #xf2 nil nil ; #x148-#x14f 1701 nil nil nil nil nil nil nil nil ; #x150-#x157 1702 nil nil nil nil nil nil nil nil ; #x158-#x15f 1703 #xaa #xba nil nil nil nil #xab #xbb ; #x160-#x167 1704 #xd7 #xf7 #xae #xbe nil nil nil nil ; #x168-#x16f 1705 nil nil #xd9 #xf9 nil nil nil nil ; #x170-#x177 1706 nil nil nil nil nil #xac #xbc nil ; #x178-#x17f 1707 )) 1708 1709 (define-character-encoding :iso-8859-10 1710 "An 8-bit, fixed-width character encoding in which codes #x00-#x9f 1711 map to their Unicode equivalents and other codes map to other Unicode 1712 character values. Intended to provide most characters found in Nordic 1713 alphabets." 1714 1715 :aliases '(:iso_8859-10 :latin6 :csISOLatin6 :iso-ir-157) 1716 :stream-encode-function 1717 (nfunction 1718 iso-8859-10-stream-encode 1719 (lambda (char write-function stream) 1720 (let* ((code (char-code char)) 1721 (c2 (cond ((< code #xa0) code) 1722 ((< code #x180) 1723 (svref *unicode-a0-180-to-iso-8859-10* 1724 (the fixnum (- code #xa0))))))) 1725 (declare (type (mod #x110000) code)) 1726 (funcall write-function stream (or c2 (char-code #\Sub))) 1727 1))) 1728 :stream-decode-function 1729 (nfunction 1730 iso-8859-10-stream-decode 1731 (lambda (1st-unit next-unit-function stream) 1732 (declare (ignore next-unit-function stream) 1733 (type (unsigned-byte 8) 1st-unit)) 1734 (if (< 1st-unit #xa0) 1735 (code-char 1st-unit) 1736 (svref *iso-8859-10-to-unicode* (the fixnum (- 1st-unit #xa0)))))) 1737 :vector-encode-function 1738 (nfunction 1739 iso-8859-10-vector-encode 1740 (lambda (string vector idx start end) 1741 (declare (type (simple-array (unsigned-byte 8) (*)) vector) 1742 (fixnum idx)) 1743 (do* ((i start (1+ i))) 1744 ((>= i end) idx) 1745 (let* ((char (schar string i)) 1746 (code (char-code char)) 1747 (c2 (cond ((< code #xa0) code) 1748 ((< code #x180) 1749 (svref *unicode-a0-180-to-iso-8859-10* 1750 (the fixnum (- code #xa0))))))) 1751 (declare (type (mod #x110000) code)) 1752 (setf (aref vector idx) (or c2 (char-code #\Sub))) 1753 (incf idx))))) 1754 :vector-decode-function 1755 (nfunction 1756 iso-8859-10-vector-decode 1757 (lambda (vector idx noctets string) 1758 (declare (type (simple-array (unsigned-byte 8) (*)) vector)) 1759 (do* ((i 0 (1+ i)) 1760 (index idx (1+ index))) 1761 ((>= i noctets) index) 1762 (let* ((1st-unit (aref vector index))) 1763 (declare (type (unsigned-byte 8) 1st-unit)) 1764 (setf (schar string i) 1765 (if (< 1st-unit #xa0) 1766 (code-char 1st-unit) 1767 (svref *iso-8859-10-to-unicode* (the fixnum (- 1st-unit #xa0))))))))) 1768 :memory-encode-function 1769 (nfunction 1770 iso-8859-10-memory-encode 1771 (lambda (string pointer idx start end) 1772 (do* ((i start (1+ i))) 1773 ((>= i end) idx) 1774 (let* ((code (char-code (schar string i))) 1775 (c2 (cond ((< code #xa0) code) 1776 ((< code #x180) 1777 (svref *unicode-a0-180-to-iso-8859-10* 1778 (the fixnum (- code #xa0))))))) 1779 (declare (type (mod #x110000) code)) 1780 (setf (%get-unsigned-byte pointer idx) (or c2 (char-code #\Sub))) 1781 (incf idx))))) 1782 :memory-decode-function 1783 (nfunction 1784 iso-8859-10-memory-decode 1785 (lambda (pointer noctets idx string) 1786 (do* ((i 0 (1+ i)) 1787 (index idx (1+ index))) 1788 ((>= i noctets) index) 1789 (let* ((1st-unit (%get-unsigned-byte pointer index))) 1790 (declare (type (unsigned-byte 8) 1st-unit)) 1791 (setf (schar string i) 1792 (if (< 1st-unit #xa0) 1793 (code-char 1st-unit) 1794 (svref *iso-8859-10-to-unicode* (the fixnum (- 1st-unit #xa0))))))))) 1795 :octets-in-string-function 1796 #'8-bit-fixed-width-octets-in-string 1797 :length-of-vector-encoding-function 1798 #'8-bit-fixed-width-length-of-vector-encoding 1799 :length-of-memory-encoding-function 1800 #'8-bit-fixed-width-length-of-memory-encoding 1801 :literal-char-code-limit #xa0 1802 ) 1803 1804 (define-character-encoding :iso-8859-11 1805 "An 8-bit, fixed-width character encoding in which codes #x00-#x9f 1806 map to their Unicode equivalents and other codes map to other Unicode 1807 character values. Intended to provide most characters found the Thai 1808 alphabet." 1809 :aliases '() 1810 :stream-encode-function 1811 (nfunction 1812 iso-8859-11-stream-encode 1813 (lambda (char write-function stream) 1814 (let* ((code (char-code char)) 1815 (c2 (cond ((< code #xa1) code) 1816 ((and (<= code #xfb) 1817 (not (and (>= code #xdb) (<= code #xde)))) 1818 (+ code #x0d60))))) 1819 (declare (type (mod #x110000) code)) 1820 (funcall write-function stream (or c2 (char-code #\Sub))) 1821 1))) 1822 :stream-decode-function 1823 (nfunction 1824 iso-8859-11-stream-decode 1825 (lambda (1st-unit next-unit-function stream) 1826 (declare (ignore next-unit-function stream) 1827 (type (unsigned-byte 8) 1st-unit)) 1828 (if (< 1st-unit #xa1) 1829 (code-char 1st-unit) 1830 (if (and (>= 1st-unit #xe01) 1831 (<= 1st-unit #xe5b) 1832 (not (and (>= 1st-unit #xe3b) 1833 (<= 1st-unit #xe3e)))) 1834 (code-char (- 1st-unit #xd60)) 1835 #\Replacement_Character)))) 1836 :vector-encode-function 1837 (nfunction 1838 iso-8859-11-vector-encode 1839 (lambda (string vector idx start end) 1840 (declare (type (simple-array (unsigned-byte 8) (*)) vector) 1841 (fixnum idx)) 1842 (do* ((i start (1+ i))) 1843 ((>= i end) idx) 1844 (let* ((char (schar string i)) 1845 (code (char-code char)) 1846 (c2 (cond ((< code #xa1) code) 1847 ((and (<= code #xfb) 1848 (not (and (>= code #xdb) (<= code #xde)))) 1849 (+ code #x0d60))))) 1850 (declare (type (mod #x110000) code)) 1851 (setf (aref vector idx) (or c2 (char-code #\Sub))) 1852 (incf idx))))) 1853 :vector-decode-function 1854 (nfunction 1855 iso-8859-11-vector-decode 1856 (lambda (vector idx noctets string) 1857 (declare (type (simple-array (unsigned-byte 8) (*)) vector)) 1858 (do* ((i 0 (1+ i)) 1859 (index idx (1+ index))) 1860 ((>= i noctets) index) 1861 (let* ((1st-unit (aref vector index))) 1862 (declare (type (unsigned-byte 8) 1st-unit)) 1863 (setf (schar string i) 1864 (if (< 1st-unit #xa1) 1865 (code-char 1st-unit) 1866 (if (and (>= 1st-unit #xe01) 1867 (<= 1st-unit #xe5b) 1868 (not (and (>= 1st-unit #xe3b) 1869 (<= 1st-unit #xe3e)))) 1870 (code-char (- 1st-unit #xd60)) 1871 #\Replacement_Character))))))) 1872 :memory-encode-function 1873 (nfunction 1874 iso-8859-11-memory-encode 1875 (lambda (string pointer idx start end) 1876 (do* ((i start (1+ i))) 1877 ((>= i end) idx) 1878 (let* ((code (char-code (schar string i))) 1879 (c2 (cond ((< code #xa1) code) 1880 ((and (<= code #xfb) 1881 (not (and (>= code #xdb) (<= code #xde)))) 1882 (+ code #x0d60))))) 1883 (declare (type (mod #x110000) code)) 1884 (setf (%get-unsigned-byte pointer idx) (or c2 (char-code #\Sub))) 1885 (incf idx))))) 1886 :memory-decode-function 1887 (nfunction 1888 iso-8859-11-memory-decode 1889 (lambda (pointer noctets idx string) 1890 (do* ((i 0 (1+ i)) 1891 (index idx (1+ index))) 1892 ((>= i noctets) index) 1893 (let* ((1st-unit (%get-unsigned-byte pointer index))) 1894 (declare (type (unsigned-byte 8) 1st-unit)) 1895 (setf (schar string i) 1896 (if (< 1st-unit #xa1) 1897 (code-char 1st-unit) 1898 (if (and (>= 1st-unit #xe01) 1899 (<= 1st-unit #xe5b) 1900 (not (and (>= 1st-unit #xe3b) 1901 (<= 1st-unit #xe3e)))) 1902 (code-char (- 1st-unit #xd60)) 1903 #\Replacement_Character))))))) 1904 :octets-in-string-function 1905 #'8-bit-fixed-width-octets-in-string 1906 :length-of-vector-encoding-function 1907 #'8-bit-fixed-width-length-of-vector-encoding 1908 :length-of-memory-encoding-function 1909 #'8-bit-fixed-width-length-of-memory-encoding 1910 :literal-char-code-limit #xa0 1911 ) 1912 1913 ;;; There is no iso-8859-12 encoding. 1914 1915 (defparameter *iso-8859-13-to-unicode* 1916 #( 1917 ;; #xa0 1918 #\u+00a0 #\u+201d #\u+00a2 #\u+00a3 #\u+00a4 #\u+201e #\u+00a6 #\u+00a7 1919 #\u+00d8 #\u+00a9 #\u+0156 #\u+00ab #\u+00ac #\u+00ad #\u+00ae #\u+00c6 1920 ;; #xb0 1921 #\u+00b0 #\u+00b1 #\u+00b2 #\u+00b3 #\u+201c #\u+00b5 #\u+00b6 #\u+00b7 1922 #\u+00f8 #\u+00b9 #\u+0157 #\u+00bb #\u+00bc #\u+00bd #\u+00be #\u+00e6 1923 ;; #xc0 1924 #\u+0104 #\u+012e #\u+0100 #\u+0106 #\u+00c4 #\u+00c5 #\u+0118 #\u+0112 1925 #\u+010c #\u+00c9 #\u+0179 #\u+0116 #\u+0122 #\u+0136 #\u+012a #\u+013b 1926 ;; #xd0 1927 #\u+0160 #\u+0143 #\u+0145 #\u+00d3 #\u+014c #\u+00d5 #\u+00d6 #\u+00d7 1928 #\u+0172 #\u+0141 #\u+015a #\u+016a #\u+00dc #\u+017b #\u+017d #\u+00df 1929 ;; #xe0 1930 #\u+0105 #\u+012f #\u+0101 #\u+0107 #\u+00e4 #\u+00e5 #\u+0119 #\u+0113 1931 #\u+010d #\u+00e9 #\u+017a #\u+0117 #\u+0123 #\u+0137 #\u+012b #\u+013c 1932 ;; #xf0 1933 #\u+0161 #\u+0144 #\u+0146 #\u+00f3 #\u+014d #\u+00f5 #\u+00f6 #\u+00f7 1934 #\u+0173 #\u+0142 #\u+015b #\u+016b #\u+00fc #\u+017c #\u+017e #\u+2019 1935 )) 1936 1937 (defparameter *unicode-a0-180-to-iso-8859-13* 1938 #( 1939 #xa0 nil #xa2 #xa3 #xa4 nil #xa6 #xa7 ; #xa0-#xa7 1940 nil #xa9 nil #xab #xac #xad #xae nil ; #xa8-#xaf 1941 #xb0 #xb1 #xb2 #xb3 nil #xb5 #xb6 #xb7 ; #xb0-#xb7 1942 nil #xb9 nil #xbb #xbc #xbd #xbe nil ; #xb8-#xbf 1943 nil nil nil nil #xc4 #xc5 #xaf nil ; #xc0-#xc7 1944 nil #xc9 nil nil nil nil nil nil ; #xc8-#xcf 1945 nil nil nil #xd3 nil #xd5 #xd6 #xd7 ; #xd0-#xd7 1946 #xa8 nil nil nil #xdc nil nil #xdf ; #xd8-#xdf 1947 nil nil nil nil #xe4 #xe5 #xbf nil ; #xe0-#xe7 1948 nil #xe9 nil nil nil nil nil nil ; #xe8-#xef 1949 nil nil nil #xf3 nil #xf5 #xf6 #xf7 ; #xf0-#xf7 1950 #xb8 nil nil nil #xfc nil nil nil ; #xf8-#xff 1951 #xc2 #xe2 nil nil #xc0 #xe0 #xc3 #xe3 ; #x100-#x107 1952 nil nil nil nil #xc8 #xe8 nil nil ; #x108-#x10f 1953 nil nil #xc7 #xe7 nil nil #xcb #xeb ; #x110-#x117 1954 #xc6 #xe6 nil nil nil nil nil nil ; #x118-#x11f 1955 nil nil #xcc #xec nil nil nil nil ; #x120-#x127 1956 nil nil #xce #xee nil nil #xc1 #xe1 ; #x128-#x12f 1957 nil nil nil nil nil nil #xcd #xed ; #x130-#x137 1958 nil nil nil #xcf #xef nil nil nil ; #x138-#x13f 1959 nil #xd9 #xf9 #xd1 #xf1 #xd2 #xf2 nil ; #x140-#x147 1960 nil nil nil nil #xd4 #xf4 nil nil ; #x148-#x14f 1961 nil nil nil nil nil nil #xaa #xba ; #x150-#x157 1962 nil nil #xda #xfa nil nil nil nil ; #x158-#x15f 1963 #xd0 #xf0 nil nil nil nil nil nil ; #x160-#x167 1964 nil nil #xdb #xfb nil nil nil nil ; #x168-#x16f 1965 nil nil #xd8 #xf8 nil nil nil nil ; #x170-#x177 1966 nil #xca #xea #xdd #xfd #xde #xfe nil ; #x178-#x17f 1967 )) 1968 1969 (defparameter *unicode-2018-2020-to-iso-8859-13* 1970 #( 1971 nil #xff nil nil #xb4 #xa1 #xa5 nil ; #x2018-#x201f */ 1972 )) 1973 1974 1975 (define-character-encoding :iso-8859-13 1976 "An 8-bit, fixed-width character encoding in which codes #x00-#x9f 1977 map to their Unicode equivalents and other codes map to other Unicode 1978 character values. Intended to provide most characters found in Baltic 1979 alphabets." 1980 1981 :aliases '() 1982 :stream-encode-function 1983 (nfunction 1984 iso-8859-13-stream-encode 1985 (lambda (char write-function stream) 1986 (let* ((code (char-code char)) 1987 (c2 (cond ((< code #xa0) code) 1988 ((< code #x180) 1989 (svref *unicode-a0-180-to-iso-8859-13* 1990 (the fixnum (- code #xa0)))) 1991 ((and (>= code #x2018) 1992 (< code #x2020)) 1993 (svref *unicode-2018-2020-to-iso-8859-13* 1994 (the fixnum (- code #x2018))))))) 1995 (declare (type (mod #x110000) code)) 1996 (funcall write-function stream (or c2 (char-code #\Sub))) 1997 1))) 1998 :stream-decode-function 1999 (nfunction 2000 iso-8859-13-stream-decode 2001 (lambda (1st-unit next-unit-function stream) 2002 (declare (ignore next-unit-function stream) 2003 (type (unsigned-byte 8) 1st-unit)) 2004 (if (< 1st-unit #xa0) 2005 (code-char 1st-unit) 2006 (svref *iso-8859-13-to-unicode* (the fixnum (- 1st-unit #xa0)))))) 2007 :vector-encode-function 2008 (nfunction 2009 iso-8859-13-vector-encode 2010 (lambda (string vector idx start end) 2011 (declare (type (simple-array (unsigned-byte 8) (*)) vector) 2012 (fixnum idx)) 2013 (do* ((i start (1+ i))) 2014 ((>= i end) idx) 2015 (let* ((char (schar string i)) 2016 (code (char-code char)) 2017 (c2 (cond ((< code #xa0) code) 2018 ((< code #x180) 2019 (svref *unicode-a0-180-to-iso-8859-13* 2020 (the fixnum (- code #xa0)))) 2021 ((and (>= code #x2018) 2022 (< code #x2020)) 2023 (svref *unicode-2018-2020-to-iso-8859-13* 2024 (the fixnum (- code #x2018))))))) 2025 (declare (type (mod #x110000) code)) 2026 (setf (aref vector idx) (or c2 (char-code #\Sub))) 2027 (incf idx))))) 2028 :vector-decode-function 2029 (nfunction 2030 iso-8859-13-vector-decode 2031 (lambda (vector idx noctets string) 2032 (declare (type (simple-array (unsigned-byte 8) (*)) vector)) 2033 (do* ((i 0 (1+ i)) 2034 (index idx (1+ index))) 2035 ((>= i noctets) index) 2036 (let* ((1st-unit (aref vector index))) 2037 (declare (type (unsigned-byte 8) 1st-unit)) 2038 (setf (schar string i) 2039 (if (< 1st-unit #xa0) 2040 (code-char 1st-unit) 2041 (svref *iso-8859-13-to-unicode* (the fixnum (- 1st-unit #xa0))))))))) 2042 :memory-encode-function 2043 (nfunction 2044 iso-8859-13-memory-encode 2045 (lambda (string pointer idx start end) 2046 (do* ((i start (1+ i))) 2047 ((>= i end) idx) 2048 (let* ((code (char-code (schar string i))) 2049 (c2 (cond ((< code #xa0) code) 2050 ((< code #x180) 2051 (svref *unicode-a0-180-to-iso-8859-13* 2052 (the fixnum (- code #xa0)))) 2053 ((and (>= code #x2018) 2054 (< code #x2020)) 2055 (svref *unicode-2018-2020-to-iso-8859-13* 2056 (the fixnum (- code #x2018))))))) 2057 (declare (type (mod #x110000) code)) 2058 (setf (%get-unsigned-byte pointer idx) (or c2 (char-code #\Sub))) 2059 (incf idx))))) 2060 :memory-decode-function 2061 (nfunction 2062 iso-8859-13-memory-decode 2063 (lambda (pointer noctets idx string) 2064 (do* ((i 0 (1+ i)) 2065 (index idx (1+ index))) 2066 ((>= i noctets) index) 2067 (let* ((1st-unit (%get-unsigned-byte pointer index))) 2068 (declare (type (unsigned-byte 8) 1st-unit)) 2069 (setf (schar string i) 2070 (if (< 1st-unit #xa0) 2071 (code-char 1st-unit) 2072 (svref *iso-8859-13-to-unicode* (the fixnum (- 1st-unit #xa0))))))))) 2073 :octets-in-string-function 2074 #'8-bit-fixed-width-octets-in-string 2075 :length-of-vector-encoding-function 2076 #'8-bit-fixed-width-length-of-vector-encoding 2077 :length-of-memory-encoding-function 2078 #'8-bit-fixed-width-length-of-memory-encoding 2079 :literal-char-code-limit #xa0 2080 ) 2081 2082 (defparameter *iso-8859-14-to-unicode* 2083 #( 2084 ;; #xa0 2085 #\u+00a0 #\u+1e02 #\u+1e03 #\u+00a3 #\u+010a #\u+010b #\u+1e0a #\u+00a7 2086 #\u+1e80 #\u+00a9 #\u+1e82 #\u+1e0b #\u+1ef2 #\u+00ad #\u+00ae #\u+0178 2087 ;; #xb0 2088 #\u+1e1e #\u+1e1f #\u+0120 #\u+0121 #\u+1e40 #\u+1e41 #\u+00b6 #\u+1e56 2089 #\u+1e81 #\u+1e57 #\u+1e83 #\u+1e60 #\u+1ef3 #\u+1e84 #\u+1e85 #\u+1e61 2090 ;; #xc0 2091 #\u+00c0 #\u+00c1 #\u+00c2 #\u+00c3 #\u+00c4 #\u+00c5 #\u+00c6 #\u+00c7 2092 #\u+00c8 #\u+00c9 #\u+00ca #\u+00cb #\u+00cc #\u+00cd #\u+00ce #\u+00cf 2093 ;; #xd0 2094 #\u+0174 #\u+00d1 #\u+00d2 #\u+00d3 #\u+00d4 #\u+00d5 #\u+00d6 #\u+1e6a 2095 #\u+00d8 #\u+00d9 #\u+00da #\u+00db #\u+00dc #\u+00dd #\u+0176 #\u+00df 2096 ;; #xe0 2097 #\u+00e0 #\u+00e1 #\u+00e2 #\u+00e3 #\u+00e4 #\u+00e5 #\u+00e6 #\u+00e7 2098 #\u+00e8 #\u+00e9 #\u+00ea #\u+00eb #\u+00ec #\u+00ed #\u+00ee #\u+00ef 2099 ;; #xf0 2100 #\u+0175 #\u+00f1 #\u+00f2 #\u+00f3 #\u+00f4 #\u+00f5 #\u+00f6 #\u+1e6b 2101 #\u+00f8 #\u+00f9 #\u+00fa #\u+00fb #\u+00fc #\u+00fd #\u+0177 #\u+00ff 2102 )) 2103 2104 (defparameter *unicode-a0-100-to-iso-8859-14* 2105 #( 2106 #xa0 nil nil #xa3 nil nil nil #xa7 ; #xa0-#xa7 2107 nil #xa9 nil nil nil #xad #xae nil ; #xa8-#xaf 2108 nil nil nil nil nil nil #xb6 nil ; #xb0-#xb7 2109 nil nil nil nil nil nil nil nil ; #xb8-#xbf 2110 #xc0 #xc1 #xc2 #xc3 #xc4 #xc5 #xc6 #xc7 ; #xc0-#xc7 2111 #xc8 #xc9 #xca #xcb #xcc #xcd #xce #xcf ; #xc8-#xcf 2112 nil #xd1 #xd2 #xd3 #xd4 #xd5 #xd6 nil ; #xd0-#xd7 2113 #xd8 #xd9 #xda #xdb #xdc #xdd nil #xdf ; #xd8-#xdf 2114 #xe0 #xe1 #xe2 #xe3 #xe4 #xe5 #xe6 #xe7 ; #xe0-#xe7 2115 #xe8 #xe9 #xea #xeb #xec #xed #xee #xef ; #xe8-#xef 2116 nil #xf1 #xf2 #xf3 #xf4 #xf5 #xf6 nil ; #xf0-#xf7 2117 #xf8 #xf9 #xfa #xfb #xfc #xfd nil #xff ; #xf8-#xff 2118 )) 2119 2120 (defparameter *unicode-108-128-to-iso-8859-14* 2121 #( 2122 nil nil #xa4 #xa5 nil nil nil nil ; #x108-#x10f 2123 nil nil nil nil nil nil nil nil ; #x110-#x117 2124 nil nil nil nil nil nil nil nil ; #x118-#x11f 2125 #xb2 #xb3 nil nil nil nil nil nil ; #x120-#x127 2126 )) 2127 2128 (defparameter *unicode-170-180-to-iso-8859-14* 2129 #( 2130 nil nil nil nil #xd0 #xf0 #xde #xfe ; #x170-#x177 2131 #xaf nil nil nil nil nil nil nil ; #x178-#x17f 2132 )) 2133 2134 (defparameter *unicode-1e00-1e88-to-iso-8859-14* 2135 #( 2136 nil nil #xa1 #xa2 nil nil nil nil ; #x1e00-#x1e07 2137 nil nil #xa6 #xab nil nil nil nil ; #x1e08-#x1e0f 2138 nil nil nil nil nil nil nil nil ; #x1e10-#x1e17 2139 nil nil nil nil nil nil #xb0 #xb1 ; #x1e18-#x1e1f 2140 nil nil nil nil nil nil nil nil ; #x1e20-#x1e27 2141 nil nil nil nil nil nil nil nil ; #x1e28-#x1e2f 2142 nil nil nil nil nil nil nil nil ; #x1e30-#x1e37 2143 nil nil nil nil nil nil nil nil ; #x1e38-#x1e3f 2144 #xb4 #xb5 nil nil nil nil nil nil ; #x1e40-#x1e47 2145 nil nil nil nil nil nil nil nil ; #x1e48-#x1e4f 2146 nil nil nil nil nil nil #xb7 #xb9 ; #x1e50-#x1e57 2147 nil nil nil nil nil nil nil nil ; #x1e58-#x1e5f 2148 #xbb #xbf nil nil nil nil nil nil ; #x1e60-#x1e67 2149 nil nil #xd7 #xf7 nil nil nil nil ; #x1e68-#x1e6f 2150 nil nil nil nil nil nil nil nil ; #x1e70-#x1e77 2151 nil nil nil nil nil nil nil nil ; #x1e78-#x1e7f 2152 #xa8 #xb8 #xaa #xba #xbd #xbe nil nil ; #x1e80-#x1e87 2153 )) 2154 2155 (defparameter *unicode-1ef0-1ef8-to-iso-8859-14* 2156 #( 2157 nil nil #xac #xbc nil nil nil nil ; #x1ef0-#x1ef7 2158 )) 2159 2160 (define-character-encoding :iso-8859-14 2161 "An 8-bit, fixed-width character encoding in which codes #x00-#x9f 2162 map to their Unicode equivalents and other codes map to other Unicode 2163 character values. Intended to provide most characters found in Celtic 2164 languages." 2165 :aliases '(:iso_8859-14 :iso-ir-199 :latin8 :l8 :iso-celtic) 2166 :stream-encode-function 2167 (nfunction 2168 iso-8859-14-stream-encode 2169 (lambda (char write-function stream) 2170 (let* ((code (char-code char)) 2171 (c2 (cond ((< code #xa0) code) 2172 ((< code #x100) 2173 (svref *unicode-a0-100-to-iso-8859-14* 2174 (the fixnum (- code #xa0)))) 2175 ((and (>= code #x108) (< code #x128)) 2176 (svref *unicode-108-128-to-iso-8859-14* 2177 (the fixnum (- code #x108)))) 2178 ((and (>= code #x170) (< code #x180)) 2179 (svref *unicode-170-180-to-iso-8859-14* 2180 (the fixnum (- code #x170)))) 2181 ((and (>= code #x1e00) (< code #x1e88)) 2182 (svref *unicode-1e00-1e88-to-iso-8859-14* 2183 (the fixnum (- code #x1e00)))) 2184 ((and (>= code #x1ef0) (< code #x1ef8)) 2185 (svref *unicode-1ef0-1ef8-to-iso-8859-14* 2186 (the fixnum (- code #x1ef0))))))) 2187 (declare (type (mod #x110000) code)) 2188 (funcall write-function stream (or c2 (char-code #\Sub))) 2189 1))) 2190 :stream-decode-function 2191 (nfunction 2192 iso-8859-14-stream-decode 2193 (lambda (1st-unit next-unit-function stream) 2194 (declare (ignore next-unit-function stream) 2195 (type (unsigned-byte 8) 1st-unit)) 2196 (if (< 1st-unit #xa0) 2197 (code-char 1st-unit) 2198 (svref *iso-8859-14-to-unicode* (the fixnum (- 1st-unit #xa0)))))) 2199 :vector-encode-function 2200 (nfunction 2201 iso-8859-14-vector-encode 2202 (lambda (string vector idx start end) 2203 (declare (type (simple-array (unsigned-byte 8) (*)) vector) 2204 (fixnum idx)) 2205 (do* ((i start (1+ i))) 2206 ((>= i end) idx) 2207 (let* ((char (schar string i)) 2208 (code (char-code char)) 2209 (c2 (cond ((< code #xa0) code) 2210 ((< code #x100) 2211 (svref *unicode-a0-100-to-iso-8859-14* 2212 (the fixnum (- code #xa0)))) 2213 ((and (>= code #x108) (< code #x128)) 2214 (svref *unicode-108-128-to-iso-8859-14* 2215 (the fixnum (- code #x108)))) 2216 ((and (>= code #x170) (< code #x180)) 2217 (svref *unicode-170-180-to-iso-8859-14* 2218 (the fixnum (- code #x170)))) 2219 ((and (>= code #x1e00) (< code #x1e88)) 2220 (svref *unicode-1e00-1e88-to-iso-8859-14* 2221 (the fixnum (- code #x1e00)))) 2222 ((and (>= code #x1ef0) (< code #x1ef8)) 2223 (svref *unicode-1ef0-1ef8-to-iso-8859-14* 2224 (the fixnum (- code #x1ef0))))))) 2225 (declare (type (mod #x110000) code)) 2226 (setf (aref vector idx) (or c2 (char-code #\Sub))) 2227 (incf idx))))) 2228 :vector-decode-function 2229 (nfunction 2230 iso-8859-14-vector-decode 2231 (lambda (vector idx noctets string) 2232 (declare (type (simple-array (unsigned-byte 8) (*)) vector)) 2233 (do* ((i 0 (1+ i)) 2234 (index idx (1+ index))) 2235 ((>= i noctets) index) 2236 (let* ((1st-unit (aref vector index))) 2237 (declare (type (unsigned-byte 8) 1st-unit)) 2238 (setf (schar string i) 2239 (if (< 1st-unit #xa0) 2240 (code-char 1st-unit) 2241 (svref *iso-8859-14-to-unicode* (the fixnum (- 1st-unit #xa0))))))))) 2242 :memory-encode-function 2243 (nfunction 2244 iso-8859-14-memory-encode 2245 (lambda (string pointer idx start end) 2246 (do* ((i start (1+ i))) 2247 ((>= i end) idx) 2248 (let* ((code (char-code (schar string i))) 2249 (c2 (cond ((< code #xa0) code) 2250 ((< code #x100) 2251 (svref *unicode-a0-100-to-iso-8859-14* 2252 (the fixnum (- code #xa0)))) 2253 ((and (>= code #x108) (< code #x128)) 2254 (svref *unicode-108-128-to-iso-8859-14* 2255 (the fixnum (- code #x108)))) 2256 ((and (>= code #x170) (< code #x180)) 2257 (svref *unicode-170-180-to-iso-8859-14* 2258 (the fixnum (- code #x170)))) 2259 ((and (>= code #x1e00) (< code #x1e88)) 2260 (svref *unicode-1e00-1e88-to-iso-8859-14* 2261 (the fixnum (- code #x1e00)))) 2262 ((and (>= code #x1ef0) (< code #x1ef8)) 2263 (svref *unicode-1ef0-1ef8-to-iso-8859-14* 2264 (the fixnum (- code #x1ef0))))))) 2265 (declare (type (mod #x110000) code)) 2266 (setf (%get-unsigned-byte pointer idx) (or c2 (char-code #\Sub))) 2267 (incf idx))))) 2268 :memory-decode-function 2269 (nfunction 2270 iso-8859-14-memory-decode 2271 (lambda (pointer noctets idx string) 2272 (do* ((i 0 (1+ i)) 2273 (index idx (1+ index))) 2274 ((>= i noctets) index) 2275 (let* ((1st-unit (%get-unsigned-byte pointer index))) 2276 (declare (type (unsigned-byte 8) 1st-unit)) 2277 (setf (schar string i) 2278 (if (< 1st-unit #xa0) 2279 (code-char 1st-unit) 2280 (svref *iso-8859-14-to-unicode* (the fixnum (- 1st-unit #xa0))))))))) 2281 :octets-in-string-function 2282 #'8-bit-fixed-width-octets-in-string 2283 :length-of-vector-encoding-function 2284 #'8-bit-fixed-width-length-of-vector-encoding 2285 :length-of-memory-encoding-function 2286 #'8-bit-fixed-width-length-of-memory-encoding 2287 :literal-char-code-limit #xa0 2288 ) 2289 2290 (defparameter *iso-8859-15-to-unicode* 2291 #( 2292 ;; #xa0 2293 #\u+00a0 #\u+00a1 #\u+00a2 #\u+00a3 #\u+20ac #\u+00a5 #\u+0160 #\u+00a7 2294 #\u+0161 #\u+00a9 #\u+00aa #\u+00ab #\u+00ac #\u+00ad #\u+00ae #\u+00af 2295 ;; #xb0 2296 #\u+00b0 #\u+00b1 #\u+00b2 #\u+00b3 #\u+017d #\u+00b5 #\u+00b6 #\u+00b7 2297 #\u+017e #\u+00b9 #\u+00ba #\u+00bb #\u+0152 #\u+0153 #\u+0178 #\u+00bf 2298 ;; #xc0 2299 #\u+00c0 #\u+00c1 #\u+00c2 #\u+00c3 #\u+00c4 #\u+00c5 #\u+00c6 #\u+00c7 2300 ;; #xc8 2301 #\u+00c8 #\u+00c9 #\u+00ca #\u+00cb #\u+00cc #\u+00cd #\u+00ce #\u+00cf 2302 ;; #xd0 2303 #\u+00d0 #\u+00d1 #\u+00d2 #\u+00d3 #\u+00d4 #\u+00d5 #\u+00d6 #\u+00d7 2304 ;; #xd8 2305 #\u+00d8 #\u+00d9 #\u+00da #\u+00db #\u+00dc #\u+00dd #\u+00de #\u+00df 2306 ;; #xe0 2307 #\u+00e0 #\u+00e1 #\u+00e2 #\u+00e3 #\u+00e4 #\u+00e5 #\u+00e6 #\u+00e7 2308 ;; #xe8 2309 #\u+00e8 #\u+00e9 #\u+00ea #\u+00eb #\u+00ec #\u+00ed #\u+00ee #\u+00ef 2310 ;; #xf0 2311 #\u+00f0 #\u+00f1 #\u+00f2 #\u+00f3 #\u+00f4 #\u+00f5 #\u+00f6 #\u+00f7 2312 ;; #xf8 2313 #\u+00f8 #\u+00f9 #\u+00fa #\u+00fb #\u+00fc #\u+00fd #\u+00fe #\u+00ff 2314 )) 2315 2316 (defparameter *unicode-a0-100-to-iso-8859-15* 2317 #( 2318 #xa0 #xa1 #xa2 #xa3 nil #xa5 nil #xa7 ; #xa0-#xa7 2319 nil #xa9 #xaa #xab #xac #xad #xae #xaf ; #xa8-#xaf 2320 #xb0 #xb1 #xb2 #xb3 nil #xb5 #xb6 #xb7 ; #xb0-#xb7 2321 nil #xb9 #xba #xbb nil nil nil #xbf ; #xb8-0xbf 2322 #xc0 #xc1 #xc2 #xc3 #xc4 #xc5 #xc6 #xc7 ; #xc0-#xc7 2323 #xc8 #xc9 #xca #xcb #xcc #xcd #xce #xcf ; #xc8-#xcf 2324 #xd0 #xd1 #xd2 #xd3 #xd4 #xd5 #xd6 #xd7 ; #xd0-#xd7 2325 #xd8 #xd9 #xda #xdb #xdc #xdd #xde #xdf ; #xd8-#xdf 2326 #xe0 #xe1 #xe2 #xe3 #xe4 #xe5 #xe6 #xe7 ; #xe0-#xe7 2327 #xe8 #xe9 #xea #xeb #xec #xed #xee #xef ; #xe8-#xef 2328 #xf0 #xf1 #xf2 #xf3 #xf4 #xf5 #xf6 #xf7 ; #xf0-#xf7 2329 #xf8 #xf9 #xfa #xfb #xfc #xfd #xfe #xff ; #xf8-#xff 2330 )) 2331 2332 (defparameter *unicode-150-180-to-iso-8859-15* 2333 #( 2334 nil nil #xbc #xbd nil nil nil nil ; #x150-#x157 2335 nil nil nil nil nil nil nil nil ; #x158-#x15f 2336 #xa6 #xa8 nil nil nil nil nil nil ; #x160-#x167 2337 nil nil nil nil nil nil nil nil ; #x168-#x16f 2338 nil nil nil nil nil nil nil nil ; #x170-#x177 2339 #xbe nil nil nil nil #xb4 #xb8 nil ; #x178-#x17f 2340 )) 2341 2342 (define-character-encoding :iso-8859-15 2343 "An 8-bit, fixed-width character encoding in which codes #x00-#x9f 2344 map to their Unicode equivalents and other codes map to other Unicode 2345 character values. Intended to provide most characters found in Western 2346 European languages (including the Euro sign and some other characters 2347 missing from ISO-8859-1." 2348 :aliases '(:iso_8859-15 :latin9) 2349 :stream-encode-function 2350 (nfunction 2351 iso-8859-15-stream-encode 2352 (lambda (char write-function stream) 2353 (let* ((code (char-code char)) 2354 (c2 (cond ((< code #xa0) code) 2355 ((< code #x100) 2356 (svref *unicode-a0-100-to-iso-8859-15* 2357 (the fixnum (- code #xa0)))) 2358 ((and (>= code #x150) (< code #x180)) 2359 (svref *unicode-150-180-to-iso-8859-15* 2360 (the fixnum (- code #x150)))) 2361 ((= code #x20ac) #xa4)))) 2362 (declare (type (mod #x110000) code)) 2363 (funcall write-function stream (or c2 (char-code #\Sub))) 2364 1))) 2365 :stream-decode-function 2366 (nfunction 2367 iso-8859-15-stream-decode 2368 (lambda (1st-unit next-unit-function stream) 2369 (declare (ignore next-unit-function stream) 2370 (type (unsigned-byte 8) 1st-unit)) 2371 (if (< 1st-unit #xa0) 2372 (code-char 1st-unit) 2373 (svref *iso-8859-15-to-unicode* (the fixnum (- 1st-unit #xa0)))))) 2374 :vector-encode-function 2375 (nfunction 2376 iso-8859-15-vector-encode 2377 (lambda (string vector idx start end) 2378 (declare (type (simple-array (unsigned-byte 8) (*)) vector) 2379 (fixnum idx)) 2380 (do* ((i start (1+ i))) 2381 ((>= i end) idx) 2382 (let* ((char (schar string i)) 2383 (code (char-code char)) 2384 (c2 (cond ((< code #xa0) code) 2385 ((< code #x100) 2386 (svref *unicode-a0-100-to-iso-8859-15* 2387 (the fixnum (- code #xa0)))) 2388 ((and (>= code #x150) (< code #x180)) 2389 (svref *unicode-150-180-to-iso-8859-15* 2390 (the fixnum (- code #x150)))) 2391 ((= code #x20ac) #xa4)))) 2392 (declare (type (mod #x110000) code)) 2393 (setf (aref vector idx) (or c2 (char-code #\Sub))) 2394 (incf idx))))) 2395 :vector-decode-function 2396 (nfunction 2397 iso-8859-15-vector-decode 2398 (lambda (vector idx noctets string) 2399 (declare (type (simple-array (unsigned-byte 8) (*)) vector)) 2400 (do* ((i 0 (1+ i)) 2401 (index idx (1+ index))) 2402 ((>= i noctets) index) 2403 (let* ((1st-unit (aref vector index))) 2404 (declare (type (unsigned-byte 8) 1st-unit)) 2405 (setf (schar string i) 2406 (if (< 1st-unit #xa0) 2407 (code-char 1st-unit) 2408 (svref *iso-8859-15-to-unicode* (the fixnum (- 1st-unit #xa0))))))))) 2409 :memory-encode-function 2410 (nfunction 2411 iso-8859-15-memory-encode 2412 (lambda (string pointer idx start end) 2413 (do* ((i start (1+ i))) 2414 ((>= i end) idx) 2415 (let* ((code (char-code (schar string i))) 2416 (c2 (cond ((< code #xa0) code) 2417 ((< code #x100) 2418 (svref *unicode-a0-100-to-iso-8859-15* 2419 (the fixnum (- code #xa0)))) 2420 ((and (>= code #x150) (< code #x180)) 2421 (svref *unicode-150-180-to-iso-8859-15* 2422 (the fixnum (- code #x150)))) 2423 ((= code #x20ac) #xa4)))) 2424 (declare (type (mod #x110000) code)) 2425 (setf (%get-unsigned-byte pointer idx) (or c2 (char-code #\Sub))) 2426 (incf idx))))) 2427 :memory-decode-function 2428 (nfunction 2429 iso-8859-15-memory-decode 2430 (lambda (pointer noctets idx string) 2431 (do* ((i 0 (1+ i)) 2432 (index idx (1+ index))) 2433 ((>= i noctets) index) 2434 (let* ((1st-unit (%get-unsigned-byte pointer index))) 2435 (declare (type (unsigned-byte 8) 1st-unit)) 2436 (setf (schar string i) 2437 (if (< 1st-unit #xa0) 2438 (code-char 1st-unit) 2439 (svref *iso-8859-15-to-unicode* (the fixnum (- 1st-unit #xa0))))))))) 2440 :octets-in-string-function 2441 #'8-bit-fixed-width-octets-in-string 2442 :length-of-vector-encoding-function 2443 #'8-bit-fixed-width-length-of-vector-encoding 2444 :length-of-memory-encoding-function 2445 #'8-bit-fixed-width-length-of-memory-encoding 2446 :literal-char-code-limit #xa0 2447 ) 2448 2449 (defparameter *iso-8859-16-to-unicode* 2450 #( 2451 ;; #xa0 2452 #\u+00a0 #\u+0104 #\u+0105 #\u+0141 #\u+20ac #\u+201e #\u+0160 #\u+00a7 2453 #\u+0161 #\u+00a9 #\u+0218 #\u+00ab #\u+0179 #\u+00ad #\u+017a #\u+017b 2454 ;; #xb0 2455 #\u+00b0 #\u+00b1 #\u+010c #\u+0142 #\u+017d #\u+201d #\u+00b6 #\u+00b7 2456 #\u+017e #\u+010d #\u+0219 #\u+00bb #\u+0152 #\u+0153 #\u+0178 #\u+017c 2457 ;; #xc0 2458 #\u+00c0 #\u+00c1 #\u+00c2 #\u+0102 #\u+00c4 #\u+0106 #\u+00c6 #\u+00c7 2459 #\u+00c8 #\u+00c9 #\u+00ca #\u+00cb #\u+00cc #\u+00cd #\u+00ce #\u+00cf 2460 ;; #xd0 2461 #\u+0110 #\u+0143 #\u+00d2 #\u+00d3 #\u+00d4 #\u+0150 #\u+00d6 #\u+015a 2462 #\u+0170 #\u+00d9 #\u+00da #\u+00db #\u+00dc #\u+0118 #\u+021a #\u+00df 2463 ;; #xe0 2464 #\u+00e0 #\u+00e1 #\u+00e2 #\u+0103 #\u+00e4 #\u+0107 #\u+00e6 #\u+00e7 2465 #\u+00e8 #\u+00e9 #\u+00ea #\u+00eb #\u+00ec #\u+00ed #\u+00ee #\u+00ef 2466 ;; #xf0 2467 #\u+0111 #\u+0144 #\u+00f2 #\u+00f3 #\u+00f4 #\u+0151 #\u+00f6 #\u+015b 2468 #\u+0171 #\u+00f9 #\u+00fa #\u+00fb #\u+00fc #\u+0119 #\u+021b #\u+00ff 2469 )) 2470 2471 (defparameter *unicode-a0-180-to-iso-8859-16* 2472 #( 2473 #xa0 nil nil nil nil nil nil #xa7 ; #xa0-#xa7 2474 nil #xa9 nil #xab nil #xad nil nil ; #xa8-#xaf 2475 #xb0 #xb1 nil nil nil nil #xb6 #xb7 ; #xb0-#xb7 2476 nil nil nil #xbb nil nil nil nil ; #xb8-#xbf 2477 #xc0 #xc1 #xc2 nil #xc4 nil #xc6 #xc7 ; #xc0-#xc7 2478 #xc8 #xc9 #xca #xcb #xcc #xcd #xce #xcf ; #xc8-#xcf 2479 nil nil #xd2 #xd3 #xd4 nil #xd6 nil ; #xd0-#xd7 2480 nil #xd9 #xda #xdb #xdc nil nil #xdf ; #xd8-#xdf 2481 #xe0 #xe1 #xe2 nil #xe4 nil #xe6 #xe7 ; #xe0-#xe7 2482 #xe8 #xe9 #xea #xeb #xec #xed #xee #xef ; #xe8-#xef 2483 nil nil #xf2 #xf3 #xf4 nil #xf6 nil ; #xf0-#xf7 2484 nil #xf9 #xfa #xfb #xfc nil nil #xff ; #xf8-#xff 2485 nil nil #xc3 #xe3 #xa1 #xa2 #xc5 #xe5 ; #x100-#x107 2486 nil nil nil nil #xb2 #xb9 nil nil ; #x108-#x10f 2487 #xd0 #xf0 nil nil nil nil nil nil ; #x110-#x117 2488 #xdd #xfd nil nil nil nil nil nil ; #x118-#x11f 2489 nil nil nil nil nil nil nil nil ; #x120-#x127 2490 nil nil nil nil nil nil nil nil ; #x128-#x12f 2491 nil nil nil nil nil nil nil nil ; #x130-#x137 2492 nil nil nil nil nil nil nil nil ; #x138-#x13f 2493 nil #xa3 #xb3 #xd1 #xf1 nil nil nil ; #x140-#x147 2494 nil nil nil nil nil nil nil nil ; #x148-#x14f 2495 #xd5 #xf5 #xbc #xbd nil nil nil nil ; #x150-#x157 2496 nil nil #xd7 #xf7 nil nil nil nil ; #x158-#x15f 2497 #xa6 #xa8 nil nil nil nil nil nil ; #x160-#x167 2498 nil nil nil nil nil nil nil nil ; #x168-#x16f 2499 #xd8 #xf8 nil nil nil nil nil nil ; #x170-#x177 2500 #xbe #xac #xae #xaf #xbf #xb4 #xb8 nil ; #x178-#x17f 2501 )) 2502 2503 (defparameter *unicode-218-220-to-iso-8859-16* 2504 #( 2505 #xaa #xba #xde #xfe nil nil nil nil ; #x218-#x21f 2506 )) 2507 2508 (defparameter *unicode-2018-2020-to-iso-8859-16* 2509 #( 2510 nil nil nil nil nil #xb5 #xa5 nil ; #x2018-#x201f 2511 )) 2512 2513 2514 (define-character-encoding :iso-8859-16 2515 "An 8-bit, fixed-width character encoding in which codes #x00-#x9f 2516 map to their Unicode equivalents and other codes map to other Unicode 2517 character values. Intended to provide most characters found in Southeast 2518 European languages." 2519 :aliases '(:iso_8859-16 :latin10 :l1 :iso-ir-226) 2520 :stream-encode-function 2521 (nfunction 2522 iso-8859-16-stream-encode 2523 (lambda (char write-function stream) 2524 (let* ((code (char-code char)) 2525 (c2 (cond ((< code #xa0) code) 2526 ((< code #x180) 2527 (svref *unicode-a0-180-to-iso-8859-16* 2528 (the fixnum (- code #xa0)))) 2529 ((and (>= code #x218) (< code #x220)) 2530 (svref *unicode-218-220-to-iso-8859-16* 2531 (the fixnum (- code #x218)))) 2532 ((and (>= code #x2018) (< code #x2020)) 2533 (svref *unicode-2018-2020-to-iso-8859-16* 2534 (the fixnum (- code #x2018)))) 2535 ((= code #x20ac) #xa4)))) 2536 (declare (type (mod #x110000) code)) 2537 (funcall write-function stream (or c2 (char-code #\Sub))) 2538 1))) 2539 :stream-decode-function 2540 (nfunction 2541 iso-8859-16-stream-decode 2542 (lambda (1st-unit next-unit-function stream) 2543 (declare (ignore next-unit-function stream) 2544 (type (unsigned-byte 8) 1st-unit)) 2545 (if (< 1st-unit #xa0) 2546 (code-char 1st-unit) 2547 (svref *iso-8859-16-to-unicode* (the fixnum (- 1st-unit #xa0)))))) 2548 :vector-encode-function 2549 (nfunction 2550 iso-8859-16-vector-encode 2551 (lambda (string vector idx start end) 2552 (declare (type (simple-array (unsigned-byte 8) (*)) vector) 2553 (fixnum idx)) 2554 (do* ((i start (1+ i))) 2555 ((>= i end) idx) 2556 (let* ((char (schar string i)) 2557 (code (char-code char)) 2558 (c2 (cond ((< code #xa0) code) 2559 ((< code #x180) 2560 (svref *unicode-a0-180-to-iso-8859-16* 2561 (the fixnum (- code #xa0)))) 2562 ((and (>= code #x218) (< code #x220)) 2563 (svref *unicode-218-220-to-iso-8859-16* 2564 (the fixnum (- code #x218)))) 2565 ((and (>= code #x2018) (< code #x2020)) 2566 (svref *unicode-2018-2020-to-iso-8859-16* 2567 (the fixnum (- code #x2018)))) 2568 ((= code #x20ac) #xa4)))) 2569 (declare (type (mod #x110000) code)) 2570 (setf (aref vector idx) (or c2 (char-code #\Sub))) 2571 (incf idx))))) 2572 :vector-decode-function 2573 (nfunction 2574 iso-8859-16-vector-decode 2575 (lambda (vector idx noctets string) 2576 (declare (type (simple-array (unsigned-byte 8) (*)) vector)) 2577 (do* ((i 0 (1+ i)) 2578 (index idx (1+ index))) 2579 ((>= i noctets) index) 2580 (let* ((1st-unit (aref vector index))) 2581 (declare (type (unsigned-byte 8) 1st-unit)) 2582 (setf (schar string i) 2583 (if (< 1st-unit #xa0) 2584 (code-char 1st-unit) 2585 (svref *iso-8859-16-to-unicode* (the fixnum (- 1st-unit #xa0))))))))) 2586 :memory-encode-function 2587 (nfunction 2588 iso-8859-16-memory-encode 2589 (lambda (string pointer idx start end) 2590 (do* ((i start (1+ i))) 2591 ((>= i end) idx) 2592 (let* ((code (char-code (schar string i))) 2593 (c2 (cond ((< code #xa0) code) 2594 ((< code #x180) 2595 (svref *unicode-a0-180-to-iso-8859-16* 2596 (the fixnum (- code #xa0)))) 2597 ((and (>= code #x218) (< code #x220)) 2598 (svref *unicode-218-220-to-iso-8859-16* 2599 (the fixnum (- code #x218)))) 2600 ((and (>= code #x2018) (< code #x2020)) 2601 (svref *unicode-2018-2020-to-iso-8859-16* 2602 (the fixnum (- code #x2018)))) 2603 ((= code #x20ac) #xa4)))) 2604 (declare (type (mod #x110000) code)) 2605 (setf (%get-unsigned-byte pointer idx) (or c2 (char-code #\Sub))) 2606 (incf idx))))) 2607 :memory-decode-function 2608 (nfunction 2609 iso-8859-16-memory-decode 2610 (lambda (pointer noctets idx string) 2611 (do* ((i 0 (1+ i)) 2612 (index idx (1+ index))) 2613 ((>= i noctets) index) 2614 (let* ((1st-unit (%get-unsigned-byte pointer index))) 2615 (declare (type (unsigned-byte 8) 1st-unit)) 2616 (setf (schar string i) 2617 (if (< 1st-unit #xa0) 2618 (code-char 1st-unit) 2619 (svref *iso-8859-16-to-unicode* (the fixnum (- 1st-unit #xa0))))))))) 2620 :octets-in-string-function 2621 #'8-bit-fixed-width-octets-in-string 2622 :length-of-vector-encoding-function 2623 #'8-bit-fixed-width-length-of-vector-encoding 2624 :length-of-memory-encoding-function 2625 #'8-bit-fixed-width-length-of-memory-encoding 2626 :literal-char-code-limit #xa0 2627 ) 2628 869 2629 870 2630 ;;; UTF-8. Decoding checks for malformed sequences; it might be faster (and
Note:
See TracChangeset
for help on using the changeset viewer.
