Changeset 5335
- Timestamp:
- Oct 12, 2006, 6:45:47 PM (18 years ago)
- Location:
- trunk/ccl/level-1
- Files:
-
- 3 edited
-
l1-streams.lisp (modified) (11 diffs)
-
l1-sysio.lisp (modified) (12 diffs)
-
l1-unicode.lisp (modified) (49 diffs)
Legend:
- Unmodified
- Added
- Removed
-
trunk/ccl/level-1/l1-streams.lisp
r5333 r5335 1273 1273 1274 1274 1275 1276 1277 1278 1279 (defun u8-translate-cr-to-lf (vector n)1280 (declare (type (simple-array (unsigned-byte 8) (*)) vector)1281 (type index n)1282 (optimize (speed 3) (safety 0)))1283 (dotimes (i n t)1284 (if (= (the (unsigned-byte 8) (aref vector i)) (char-code #\Return))1285 (setf (aref vector i) (char-code #\Linefeed)))))1286 1287 (defun u8-translate-lf-to-cr (vector n)1288 (declare (type (simple-array (unsigned-byte 8) (*)) vector)1289 (type index n)1290 (optimize (speed 3) (safety 0)))1291 (dotimes (i n t)1292 (if (= (the (unsigned-byte 8) (aref vector i)) (char-code #\Linefeed))1293 (setf (aref vector i) (char-code #\Return)))))1294 1295 1296 (defun big-endian-u16-translate-cr-to-lf (vector n)1297 (declare (type (simple-array (unsigned-byte 8) (*)) vector)1298 (type index n)1299 (optimize (speed 3) (safety 0)))1300 (do* ((i 0 (+ i 2))1301 (j 1 (+ j 2)))1302 ((>= i n) (= i n))1303 (declare (type index i j))1304 (if (and (= 0 (the (unsigned-byte 8) (aref vector i)))1305 (= (the (unsigned-byte 8) (aref vector j)) (char-code #\Return)))1306 (setf (aref vector j) (char-code #\Linefeed)))))1307 1308 (defun big-endian-u16-translate-lf-to-cr (vector n)1309 (declare (type (simple-array (unsigned-byte 8) (*)) vector)1310 (type index n)1311 (optimize (speed 3) (safety 0)))1312 (do* ((i 0 (+ i 2))1313 (j 1 (+ j 2)))1314 ((>= i n) (= i n))1315 (declare (type index i j))1316 (if (and (= 0 (the (unsigned-byte 8) (aref vector i)))1317 (= (the (unsigned-byte 8) (aref vector j)) (char-code #\Linefeed)))1318 (setf (aref vector j) (char-code #\Return)))))1319 1320 (defun big-endian-u32-translate-cr-to-lf (vector n)1321 (declare (type (simple-array (unsigned-byte 8) (*)) vector)1322 (type index n)1323 (optimize (speed 3) (safety 0)))1324 (do* ((w 0 (+ w 4))1325 (x 1 (+ x 4))1326 (y 2 (+ y 4))1327 (z 3 (+ z 4)))1328 ((>= w n) (= w n))1329 (declare (type index w x y z))1330 (if (and (= 0 (the (unsigned-byte 8) (aref vector w)))1331 (= 0 (the (unsigned-byte 8) (aref vector x)))1332 (= 0 (the (unsigned-byte 8) (aref vector y)))1333 (= (the (unsigned-byte 8) (aref vector z)) (char-code #\Return)))1334 (setf (aref vector z) (char-code #\Linefeed)))))1335 1336 (defun big-endian-u32-translate-lf-to-cr (vector n)1337 (declare (type (simple-array (unsigned-byte 8) (*)) vector)1338 (type index n)1339 (optimize (speed 3) (safety 0)))1340 (do* ((w 0 (+ w 4))1341 (x 1 (+ x 4))1342 (y 2 (+ y 4))1343 (z 3 (+ z 4)))1344 ((>= w n) (= w n))1345 (declare (type index w x y z))1346 (if (and (= 0 (the (unsigned-byte 8) (aref vector w)))1347 (= 0 (the (unsigned-byte 8) (aref vector x)))1348 (= 0 (the (unsigned-byte 8) (aref vector y)))1349 (= (the (unsigned-byte 8) (aref vector z)) (char-code #\Linefeed)))1350 (setf (aref vector z) (char-code #\Return)))))1351 1352 1353 (defun little-endian-u16-translate-cr-to-lf (vector n)1354 (declare (type (simple-array (unsigned-byte 8) (*)) vector)1355 (type index n)1356 (optimize (speed 3) (safety 0)))1357 (do* ((i 0 (+ i 2))1358 (j 1 (+ j 2)))1359 ((>= i n) (= i n))1360 (declare (type index i j))1361 (if (and (= 0 (the (unsigned-byte 8) (aref vector j)))1362 (= (the (unsigned-byte 8) (aref vector i)) (char-code #\Return)))1363 (setf (aref vector i) (char-code #\Linefeed)))))1364 1365 1366 (defun little-endian-u16-translate-lf-to-cr (vector n)1367 (declare (type (simple-array (unsigned-byte 8) (*)) vector)1368 (type index n)1369 (optimize (speed 3) (safety 0)))1370 (do* ((i 0 (+ i 2))1371 (j 1 (+ j 2)))1372 ((>= i n) (= i n))1373 (declare (type index i j))1374 (if (and (= 0 (the (unsigned-byte 8) (aref vector j)))1375 (= (the (unsigned-byte 8) (aref vector i)) (char-code #\Linefeed)))1376 (setf (aref vector i) (char-code #\Return)))))1377 1378 (defun little-endian-u32-translate-cr-to-lf (vector n)1379 (declare (type (simple-array (unsigned-byte 8) (*)) vector)1380 (type index n)1381 (optimize (speed 3) (safety 0)))1382 (do* ((w 0 (+ w 4))1383 (x 1 (+ x 4))1384 (y 2 (+ y 4))1385 (z 3 (+ z 4)))1386 ((>= w n) (= w n))1387 (declare (type index w x y z))1388 (if (and (= (the (unsigned-byte 8) (aref vector w)) (char-code #\Return))1389 (= 0 (the (unsigned-byte 8) (aref vector x)))1390 (= 0 (the (unsigned-byte 8) (aref vector y)))1391 (= 0 (the (unsigned-byte 8) (aref vector z))))1392 (setf (aref vector 2) (char-code #\Linefeed)))))1393 1394 (defun little-endian-32-translate-lf-to-cr (vector n)1395 (declare (type (simple-array (unsigned-byte 8) (*)) vector)1396 (type index n)1397 (optimize (speed 3) (safety 0)))1398 (do* ((w 0 (+ w 4))1399 (x 1 (+ x 4))1400 (y 2 (+ y 4))1401 (z 3 (+ z 4)))1402 ((>= w n) (= w n))1403 (declare (type index w x y z))1404 (if (and (= (the (unsigned-byte 8) (aref vector w)) (char-code #\Linefeed))1405 (= 0 (the (unsigned-byte 8) (aref vector x)))1406 (= 0 (the (unsigned-byte 8) (aref vector y)))1407 (= 0 (the (unsigned-byte 8) (aref vector z))))1408 (setf (aref vector 2) (char-code #\Return)))))1409 1410 1275 (declaim (inline %ioblock-force-output)) 1411 1276 … … 1455 1320 1456 1321 (defun %ioblock-unencoded-write-simple-string (ioblock string start-char num-chars) 1457 (declare (fixnum start- octet num-octets) (simple-string string))1322 (declare (fixnum start-char num-chars) (simple-string string)) 1458 1323 (let* ((written 0) 1459 1324 (col (ioblock-charpos ioblock)) … … 1484 1349 ((= j written)) 1485 1350 (declare (fixnum p i j)) 1486 (let* ((ch (schar string p))) 1351 (let* ((ch (schar string p)) 1352 (code (char-code ch))) 1353 (declare (type (mod #x110000) code)) 1487 1354 (if (eql ch #\newline) 1488 1355 (setq col 0) 1489 1356 (incf col)) 1490 (setf (aref buffer i) ( %char-code ch))))1357 (setf (aref buffer i) (if (>= code 256) (char-code #\Sub) code)))) 1491 1358 (setf (ioblock-dirty ioblock) t) 1492 1359 (incf index written) … … 1890 1757 (limit (ioblock-literal-char-code-limit ioblock)) 1891 1758 (encode-function (ioblock-encode-output-function ioblock)) 1892 (wcf (ioblock-write-char-when-locked-function ioblock))1893 1759 (start-char start-char (1+ start-char))) 1894 1760 ((= i num-chars) (setf (ioblock-charpos ioblock) col) num-chars) … … 1897 1763 (code (char-code char))) 1898 1764 (declare (type (mod #x110000) code)) 1899 (cond ((eq char #\newline) 1900 (setq col 0) 1901 (funcall wcf ioblock char)) 1902 (t 1903 (incf col) 1904 (if (< code limit) 1905 (%ioblock-write-u16-code-unit ioblock code) 1906 (funcall encode-function char #'%ioblock-write-u16-code-unit ioblock))))))) 1765 (if (eq char #\newline) 1766 (setq col 0) 1767 (incf col)) 1768 (if (< code limit) 1769 (%ioblock-write-u16-code-unit ioblock code) 1770 (funcall encode-function char #'%ioblock-write-u16-code-unit ioblock))))) 1907 1771 1908 1772 (declaim (inline %ioblock-write-swapped-u16-encoded-char)) … … 2152 2016 2153 2017 (defun %ioblock-unencoded-character-read-vector (ioblock vector start end) 2154 (let* ((in (ioblock-inbuf ioblock))) 2155 (if (io-buffer-translate in) 2156 (%ioblock-encoded-character-read-vector ioblock vector start end) 2157 (do* ((i start) 2158 (inbuf (io-buffer-buffer in)) 2159 (need (- end start))) 2160 ((= i end) end) 2161 (declare (fixnum i need)) 2162 (let* ((ch (%ioblock-tyi ioblock))) 2163 (if (eq ch :eof) 2164 (return i)) 2165 (setf (schar vector i) ch) 2166 (incf i) 2167 (decf need) 2168 (let* ((idx (io-buffer-idx in)) 2169 (count (io-buffer-count in)) 2170 (avail (- count idx))) 2171 (declare (fixnum idx count avail)) 2172 (unless (zerop avail) 2173 (if (> avail need) 2174 (setq avail need)) 2175 (%copy-u8-to-string inbuf idx vector i avail) 2176 (setf (io-buffer-idx in) (+ idx avail)) 2177 (incf i avail) 2178 (decf need avail)))))))) 2179 2018 (do* ((i start) 2019 (in (ioblock-inbuf ioblock)) 2020 (inbuf (io-buffer-buffer in)) 2021 (need (- end start))) 2022 ((= i end) end) 2023 (declare (fixnum i need)) 2024 (let* ((ch (%ioblock-tyi ioblock))) 2025 (if (eq ch :eof) 2026 (return i)) 2027 (setf (schar vector i) ch) 2028 (incf i) 2029 (decf need) 2030 (let* ((idx (io-buffer-idx in)) 2031 (count (io-buffer-count in)) 2032 (avail (- count idx))) 2033 (declare (fixnum idx count avail)) 2034 (unless (zerop avail) 2035 (if (> avail need) 2036 (setq avail need)) 2037 (%copy-u8-to-string inbuf idx vector i avail) 2038 (setf (io-buffer-idx in) (+ idx avail)) 2039 (incf i avail) 2040 (decf need avail)))))) 2041 2042 ;;; Also used when newline translation complicates things. 2180 2043 (defun %ioblock-encoded-character-read-vector (ioblock vector start end) 2181 2044 (declare (fixnum start end)) … … 2446 2309 (with-ioblock-input-lock-grabbed (ioblock) 2447 2310 (%ioblock-write-char-translating-newline-to-line-separator ioblock char))) 2311 2312 ;;; If we do newline translation, we probably can't be too clever about reading/writing 2313 ;;; strings. 2314 (defun %ioblock-write-simple-string-with-newline-translation (ioblock string start-pos num-chars) 2315 (declare (fixnum start-char num-chars) (simple-string string)) 2316 (let* ((col (ioblock-charpos ioblock)) 2317 (wcf (ioblock-write-char-when-locked-function ioblock))) 2318 (declare (fixnum col)) 2319 (do* ((i start-pos (1+ i)) 2320 (n 0 (1+ n))) 2321 ((= n num-chars) (setf (ioblock-charpos ioblock) col) num-chars) 2322 (let* ((char (schar string i))) 2323 (if (eql char #\Newline) 2324 (setq col 0) 2325 (incf col)) 2326 (funcall wcf ioblock char))))) 2327 2448 2328 2449 2329 … … 2501 2381 '%ioblock-unencoded-read-line))) 2502 2382 (when line-termination 2503 (setf (ioblock-read-char-without-translation-when-locked-function ioblock) 2504 (ioblock-read-char-when-locked-function ioblock)) 2505 (ecase line-termination 2506 (:cr (setf (ioblock-read-char-when-locked-function ioblock) 2507 '%ioblock-read-char-translating-cr-to-newline 2508 (ioblock-read-char-function ioblock) 2509 (case sharing 2510 (:private 2511 '%private-ioblock-read-char-translating-cr-to-newline) 2512 (:lock 2513 '%locked-ioblock-read-char-translating-cr-to-newline) 2514 (t '%ioblock-read-char-translating-cr-to-newline)))) 2515 (:crlf (setf (ioblock-read-char-when-locked-function ioblock) 2516 '%ioblock-read-char-translating-crlf-to-newline 2517 (ioblock-read-char-function ioblock) 2518 (case sharing 2519 (:private 2520 '%private-ioblock-read-char-translating-crlf-to-newline) 2521 (:lock 2522 '%locked-ioblock-read-char-translating-crlf-to-newline) 2523 (t '%ioblock-read-char-translating-crlf-to-newline)))) 2524 (:unicode (setf (ioblock-read-char-when-locked-function ioblock) 2525 '%ioblock-read-char-translating-line-separator-to-newline 2526 (ioblock-read-char-function ioblock) 2527 (case sharing 2528 (:private 2529 '%private-ioblock-read-char-translating-line-separator-to-newline) 2530 (:lock 2531 '%locked-ioblock-read-char-translating-line-separator-to-newline) 2532 (t '%ioblock-read-char-translating-line-separator-to-newline))))))) 2383 (install-ioblock-input-line-termination ioblock line-termination)) 2384 ) 2533 2385 2534 2386 (unless (or (eq element-type 'character) … … 2611 2463 '%general-ioblock-read-byte)))))) 2612 2464 2465 (defun install-ioblock-input-line-termination (ioblock line-termination) 2466 (when line-termination 2467 (let* ((sharing (ioblock-sharing ioblock))) 2468 (setf (ioblock-read-char-without-translation-when-locked-function ioblock) 2469 (ioblock-read-char-when-locked-function ioblock) 2470 (ioblock-character-read-vector-function ioblock) 2471 '%ioblock-encoded-character-read-vector 2472 (ioblock-read-line-function ioblock) '%ioblock-encoded-read-line) 2473 (ecase line-termination 2474 (:cr (setf (ioblock-read-char-when-locked-function ioblock) 2475 '%ioblock-read-char-translating-cr-to-newline 2476 (ioblock-read-char-function ioblock) 2477 (case sharing 2478 (:private 2479 '%private-ioblock-read-char-translating-cr-to-newline) 2480 (:lock 2481 '%locked-ioblock-read-char-translating-cr-to-newline) 2482 (t '%ioblock-read-char-translating-cr-to-newline)))) 2483 (:crlf (setf (ioblock-read-char-when-locked-function ioblock) 2484 '%ioblock-read-char-translating-crlf-to-newline 2485 (ioblock-read-char-function ioblock) 2486 (case sharing 2487 (:private 2488 '%private-ioblock-read-char-translating-crlf-to-newline) 2489 (:lock 2490 '%locked-ioblock-read-char-translating-crlf-to-newline) 2491 (t '%ioblock-read-char-translating-crlf-to-newline)))) 2492 (:unicode (setf (ioblock-read-char-when-locked-function ioblock) 2493 '%ioblock-read-char-translating-line-separator-to-newline 2494 (ioblock-read-char-function ioblock) 2495 (case sharing 2496 (:private 2497 '%private-ioblock-read-char-translating-line-separator-to-newline) 2498 (:lock 2499 '%locked-ioblock-read-char-translating-line-separator-to-newline) 2500 (t '%ioblock-read-char-translating-line-separator-to-newline)))))))) 2501 2613 2502 (defun setup-ioblock-output (ioblock character-p element-type sharing encoding line-termination) 2614 2503 (or (ioblock-sharing ioblock) … … 2664 2553 (t '%ioblock-write-char))))) 2665 2554 (when line-termination 2666 (setf (ioblock-write-char-without-translation-when-locked-function ioblock) 2667 (ioblock-write-char-when-locked-function ioblock)) 2668 (ecase line-termination 2669 (:cr (setf (ioblock-write-char-when-locked-function ioblock) 2670 '%ioblock-write-char-translating-newline-to-cr 2671 (ioblock-read-char-function ioblock) 2672 (case sharing 2673 (:private 2674 '%private-ioblock-write-char-translating-newline-to-cr) 2675 (:lock 2676 '%locked-ioblock-write-char-translating-newline-to-cr) 2677 (t '%ioblock-write-char-translating-newline-to-cr)))) 2678 (:crlf (setf (ioblock-write-char-when-locked-function ioblock) 2679 '%ioblock-write-char-translating-newline-to-crlf 2680 (ioblock-write-char-function ioblock) 2681 (case sharing 2682 (:private 2683 '%private-ioblock-write-char-translating-newline-to-crlf) 2684 (:lock 2685 '%locked-ioblock-write-char-translating-newline-to-crlf) 2686 (t '%ioblock-write-char-translating-newline-to-crlf)))) 2687 (:unicode (setf (ioblock-write-char-when-locked-function ioblock) 2688 '%ioblock-write-char-translating-newline-to-line-separator 2689 (ioblock-write-char-function ioblock) 2690 (case sharing 2691 (:private 2692 '%private-ioblock-write-char-translating-newline-to-line-separator) 2693 (:lock 2694 '%locked-ioblock-write-char-translating-newline-to-line-separator) 2695 (t '%ioblock-write-char-translating-newline-to-line-separator))))))) 2555 (install-ioblock-output-line-termination ioblock line-termination))) 2696 2556 (unless (or (eq element-type 'character) 2697 2557 (subtypep element-type 'character)) … … 2771 2631 '%general-ioblock-write-byte) 2772 2632 '%general-ioblock-write-byte)))))) 2633 2634 (defun install-ioblock-output-line-termination (ioblock line-termination) 2635 (let* ((sharing (ioblock-sharing ioblock))) 2636 (when line-termination 2637 (setf (ioblock-write-char-without-translation-when-locked-function ioblock) 2638 (ioblock-write-char-when-locked-function ioblock) 2639 (ioblock-write-simple-string-function ioblock) 2640 '%ioblock-write-simple-string-with-newline-translation) 2641 (ecase line-termination 2642 (:cr (setf (ioblock-write-char-when-locked-function ioblock) 2643 '%ioblock-write-char-translating-newline-to-cr 2644 (ioblock-read-char-function ioblock) 2645 (case sharing 2646 (:private 2647 '%private-ioblock-write-char-translating-newline-to-cr) 2648 (:lock 2649 '%locked-ioblock-write-char-translating-newline-to-cr) 2650 (t '%ioblock-write-char-translating-newline-to-cr)))) 2651 (:crlf (setf (ioblock-write-char-when-locked-function ioblock) 2652 '%ioblock-write-char-translating-newline-to-crlf 2653 (ioblock-write-char-function ioblock) 2654 (case sharing 2655 (:private 2656 '%private-ioblock-write-char-translating-newline-to-crlf) 2657 (:lock 2658 '%locked-ioblock-write-char-translating-newline-to-crlf) 2659 (t '%ioblock-write-char-translating-newline-to-crlf)))) 2660 (:unicode (setf (ioblock-write-char-when-locked-function ioblock) 2661 '%ioblock-write-char-translating-newline-to-line-separator 2662 (ioblock-write-char-function ioblock) 2663 (case sharing 2664 (:private 2665 '%private-ioblock-write-char-translating-newline-to-line-separator) 2666 (:lock 2667 '%locked-ioblock-write-char-translating-newline-to-line-separator) 2668 (t '%ioblock-write-char-translating-newline-to-line-separator)))))))) 2773 2669 2774 2670 (defun buffer-element-type-for-character-encoding (encoding) -
trunk/ccl/level-1/l1-sysio.lisp
r5320 r5335 20 20 (octet-pos 0 :type fixnum) ; current io position in octets 21 21 (fileeof 0 :type fixnum) ; file length in elements 22 (input-filter nil) 23 (output-filter nil) 24 (line-termination :unix)) 25 26 27 (defun install-line-termination-filters (file-ioblock line-termination in-p out-p) 28 (let* ((inferred-macos nil)) 29 (if (eq line-termination :inferred) 30 (if in-p 31 (if (eq (setq line-termination (infer-line-termination file-ioblock)) 32 :macos) 33 (setq inferred-macos t)) 34 (setq line-termination :unix))) 35 (setf (file-ioblock-line-termination file-ioblock) line-termination) 36 (when (eq line-termination :macos) 37 (let* ((encoding (or (file-ioblock-encoding file-ioblock) 38 (get-character-encoding nil))) 39 (element-size (character-encoding-code-unit-size encoding)) 40 (native-byte-order (ioblock-native-byte-order file-ioblock))) 41 (when in-p 42 (setf (file-ioblock-input-filter file-ioblock) 43 (case element-size 44 (8 'u8-translate-cr-to-lf) 45 (16 (if #+big-endian-target native-byte-order 46 #+little-endian-target (not native-byte-order) 47 'big-endian-u16-translate-cr-to-lf 48 'little-endian-swapped-u16-translate-cr-to-lf)) 49 (32 (if #+big-endian-target native-byte-order 50 #+little-endian-target (not native-byte-order) 51 'big-endian-u32-translate-cr-to-lf 52 'little-endian-swapped-u32-translate-cr-to-lf)))) 53 (if inferred-macos 54 (let* ((inbuf (file-ioblock-inbuf file-ioblock))) 55 (funcall (file-ioblock-input-filter file-ioblock) 56 (io-buffer-buffer inbuf) 57 (io-buffer-count inbuf))))) 58 (when out-p 59 (setf (file-ioblock-output-filter file-ioblock) 60 (case element-size 61 (8 'u8-translate-lf-to-cr) 62 (16 (if native-byte-order 63 'u16-translate-lf-to-cr 64 'swapped-u16-translate-lf-to-cr)) 65 (32 (if native-byte-order 66 'u32-translate-lf-to-cr 67 'swapped-u32-translate-lf-to-cr))))) 68 line-termination)))) 22 ) 23 24 25 69 26 70 27 ;;; The file-ioblock-octet-pos field is the (octet) position … … 82 39 (file-octet-filepos file-ioblock))) 83 40 84 (defun translate-cr-to-lf (file-ioblock)85 (let* ((inbuf (file-ioblock-inbuf file-ioblock))86 (string (io-buffer-buffer inbuf))87 (n (io-buffer-count inbuf)))88 (declare (simple-base-string string)89 (fixnum n))90 (dotimes (i n n)91 (if (eq (schar string i) #\Return)92 (setf (schar string i) #\Linefeed)))))93 94 (defun translate-lf-to-cr (file-ioblock n)95 (declare (fixnum n))96 (let* ((outbuf (file-ioblock-outbuf file-ioblock))97 (string (io-buffer-buffer outbuf)))98 (declare (simple-base-string string))99 (dotimes (i n n)100 (if (eq (schar string i) #\Linefeed)101 (setf (schar string i) #\Return)))))102 103 41 (defun infer-line-termination (file-ioblock) 104 42 (let* ((encoding (or (file-ioblock-encoding file-ioblock) 105 43 (get-character-encoding nil))) 106 (unit-size (character-encoding-code-unit-size encoding))107 (cr (char-code #\Return))108 (lf (char-code #\linefeed))109 44 (inbuf (file-ioblock-inbuf file-ioblock)) 110 45 (buffer (io-buffer-buffer inbuf)) 111 46 (n (io-buffer-count inbuf))) 112 (cond ((= unit-size 8) 113 (if (zerop n) 114 (setq n (fd-stream-advance (file-ioblock-stream file-ioblock) 115 file-ioblock 116 t))) 117 118 119 (do* ((i 0 (+ i 1)) 120 (code)) 121 ((= i n) :unix) 122 (setq code (aref buffer i)) 123 (if (= code cr) 124 (return :macos) 125 (if (= code lf) 126 (return :unix)))))))) 127 128 129 (defvar *known-line-termination-formats* '(:unix :macos :inferred)) 47 (when (zerop n) 48 (setq n (or (fd-stream-advance (file-ioblock-stream file-ioblock) 49 file-ioblock 50 t) 51 0))) 52 (multiple-value-bind (nchars last) 53 (funcall (character-encoding-length-of-vector-encoding-function encoding) 54 buffer 55 0 56 n) 57 (declare (fixnum nchars last)) 58 (let* ((string (make-string nchars))) 59 (declare (dynamic-extent string)) 60 (decode-character-encoded-vector encoding buffer 0 last string) 61 (let* ((line-termination 62 (do* ((i 0 (1+ i)) 63 (last-was-cr nil)) 64 ((= nchars) (if last-was-cr :cr)) 65 (declare (fixnum i)) 66 (let* ((char (schar string i))) 67 (if last-was-cr 68 (if (eq char #\Linefeed) 69 (return :crlf) 70 (return :cr)) 71 (case char 72 (#\Newline (return nil)) 73 (#\Line_Separator (return :unicode)) 74 (#\Return (setq last-was-cr t)))))))) 75 (when line-termination 76 (install-ioblock-input-line-termination file-ioblock line-termination) 77 (when (file-ioblock-outbuf file-ioblock) 78 (install-ioblock-output-line-termination file-ioblock line-termination)))))))) 79 80 130 81 131 82 (defvar *default-external-format* :unix) … … 139 90 "The value of this variable is used when :EXTERNAL-FORMAT is 140 91 unspecified or specified as :DEFAULT. It can meaningfully be given any 141 of the values :UNIX, :MACOS, :MSDOS or :INFERRED, each of which is92 of the values :UNIX, :MACOS, :MSDOS, :UNICODE or :INFERRED, each of which is 142 93 interpreted as described in the documentation. 143 94 … … 179 130 ((lookup-character-encoding external-format) 180 131 (normalize-external-format domain `(:character-encoding ,external-format))) 181 (( member external-format *known-line-termination-formats*)132 ((assq external-format *canonical-line-termination-conventions*) 182 133 (normalize-external-format domain `(:line-termination ,external-format))) 183 134 (t … … 188 139 189 140 190 (defun file-stream-force-output (stream ioblock count finish-p) 191 (let* ((filter (file-ioblock-output-filter ioblock))) 192 (when filter 193 (let* ((buffer (io-buffer-buffer (file-ioblock-outbuf ioblock)))) 194 (funcall filter buffer count))) 195 (fd-stream-force-output stream ioblock count finish-p))) 141 196 142 197 143 ;;; Establish a new position for the specified file-stream. … … 255 201 (progn 256 202 (when (file-ioblock-dirty file-ioblock) 257 (f ile-stream-force-output (file-ioblock-stream file-ioblock)258 file-ioblock259 (io-buffer-count outbuf)260 nil)203 (fd-stream-force-output (file-ioblock-stream file-ioblock) 204 file-ioblock 205 (io-buffer-count outbuf) 206 nil) 261 207 ;; May have just extended the file; may need to update 262 208 ;; fileeof. … … 309 255 (when (file-ioblock-dirty file-ioblock) 310 256 (file-ioblock-seek file-ioblock octet-base) 311 (f ile-stream-force-output (file-ioblock-stream file-ioblock)312 file-ioblock313 (io-buffer-count outbuf)314 nil))257 (fd-stream-force-output (file-ioblock-stream file-ioblock) 258 file-ioblock 259 (io-buffer-count outbuf) 260 nil)) 315 261 (file-ioblock-seek-and-reset file-ioblock 316 262 (ioblock-elements-to-octets … … 539 485 nil))) 540 486 541 ;;; Fill the input buffer, possibly doing newline translation. 542 (defun file-stream-advance (stream file-ioblock read-p) 543 (let* ((n (fd-stream-advance stream file-ioblock read-p)) 544 (filter (file-ioblock-input-filter file-ioblock))) 545 (when (and filter n (> n 0)) 546 (let* ((buf (file-ioblock-inbuf file-ioblock)) 547 (vector (io-buffer-buffer buf))) 548 (funcall filter vector n))) 549 n)) 487 550 488 551 489 ;;; If we've been reading, the file position where we're going … … 560 498 (break "Expected newpos to be ~d, fd is at ~d" newpos curpos)) 561 499 (setf (file-ioblock-octet-pos file-ioblock) newpos) 562 (f ile-stream-advance stream file-ioblock read-p)))500 (fd-stream-advance stream file-ioblock read-p))) 563 501 564 502 ;;; If the buffer's dirty, we have to back up and rewrite it before … … 571 509 (when (ioblock-dirty file-ioblock) 572 510 (file-ioblock-seek file-ioblock curpos) 573 (f ile-stream-force-output stream file-ioblock count nil))511 (fd-stream-force-output stream file-ioblock count nil)) 574 512 (unless (eql newpos (file-octet-filepos file-ioblock)) 575 513 (break "Expected newpos to be ~d, fd is at ~d" 576 514 newpos (file-octet-filepos file-ioblock))) 577 515 (setf (file-ioblock-octet-pos file-ioblock) newpos) 578 (f ile-stream-advance stream file-ioblock read-p)))516 (fd-stream-advance stream file-ioblock read-p))) 579 517 580 518 … … 585 523 (break "Expected newpos to be ~d, fd is at ~d" 586 524 curpos (file-octet-filepos file-ioblock))) 587 (let* ((n (f ile-stream-force-output stream file-ioblock count finish-p)))525 (let* ((n (fd-stream-force-output stream file-ioblock count finish-p))) 588 526 (incf (file-ioblock-octet-pos file-ioblock) (or n 0)) 589 527 n))) … … 877 815 (setf (file-ioblock-fileeof ioblock) 878 816 (ioblock-octets-to-elements ioblock (fd-size fd))) 879 (install-line-termination-filters ioblock line-termination in-p out-p) 817 (when (and in-p (eq line-termination :inferred)) 818 (infer-line-termination ioblock)) 880 819 (cond ((eq if-exists :append) 881 820 (file-position fstream :end)) -
trunk/ccl/level-1/l1-unicode.lisp
r5331 r5335 33 33 (setf (gethash name *character-encodings*) new)) 34 34 35 (defun ensure-character-encoding (thing) 36 (if (typep thing 'character-encoding) 37 thing 38 (or (lookup-character-encoding thing) 39 (error "~s is not a character-encoding or the name of a character-encoding." 40 thing)))) 41 42 35 43 (defstruct character-encoding 36 44 (name ()) ;canonical name … … 46 54 stream-decode-function ;(1ST-UNIT NEXT-UNIT STREAM) 47 55 48 ;; Returns NIL if the string can't be encoded, else sets 1 or 49 ;; more units in a vector argument and returns a value 1 greater 50 ;; than the index of the last unit written to the vector 51 vector-encode-function ;(STRING VECTOR INDEX &optional 52 ;(START 0) (END (length string))) 56 ;; Sets 1 or more units in a vector argument and returns a value 1 57 ;; greater than the index of the last octet written to the vector 58 vector-encode-function ;(STRING VECTOR INDEX START END) 53 59 54 ;; Returns the string and a value 1 greater than the last unit 55 ;; index consumed from the vector argument, or NIL and the 56 ;; argument index if the character can't be decoded. 57 vector-decode-function ;(VECTOR INDEX NUNITS STRING) 60 ;; Returns a value 1 greater than the last octet index consumed from 61 ;; the vector argument. 62 vector-decode-function ;(VECTOR INDEX NOCTETS STRING) 58 63 59 64 ;; Sets one or more units in memory at the address denoted by … … 61 66 ;; units written to memory), else returns NIL if any character 62 67 ;; can't be encoded. 63 memory-encode-function ;(STRING POINTER INDEX &optional64 ; (START 0) (END (length string))) 68 memory-encode-function ;(STRING POINTER INDEX START END) 69 65 70 66 71 ;; Returns (as multiple values) the string encoded in memory 67 72 ;; at the address denoted by the address and index args and the 68 ;; sum of the index arg and the number of units consumed, else 69 ;; NIL and the incoming index arg if the characters can't be 70 ;; encoded. (Note that the index args are and return value 71 ;; are "code unit indices", not "byte offsets".) 72 memory-decode-function ;(POINTER NUNITS INDEX STRING) 73 ;; sum of the index arg and the number of octets consumed. 74 memory-decode-function ;(POINTER NOCTETS INDEX STRING) 73 75 74 ;; Returns the number of units needed to encode STRING between START and END. 75 ;; Might return NIL if any character can't be encoded. 76 units-in-string-function ;(STRING &optional (START 0) (END (LENGTH STRING))) 77 ;; Might return NIL if the encoding's bogus 78 length-of-vector-encoding-function ;(VECTOR &optional (START 0) (END (LENGTH VECTOR))) 79 ;; Might return NIL if the encoding's bogus 80 length-of-memory-encoding-function ;(POINTER NUNITS &optional (START 0)) 76 ;; Returns the number of octets needed to encode STRING between START and END 77 octets-in-string-function ;(STRING START END) 78 79 ;; Returns the number of (full) characters encoded in VECTOR, and the index 80 ;; of the first octet not used to encode them. (The second value may be less than END). 81 length-of-vector-encoding-function ;(VECTOR START END) 82 83 ;; Returns the number of (full) characters encoded in memort at (+ POINTER START) 84 ;; and the number of octets used to encode them. (The second value may be less 85 ;; than NOCTETS.) 86 length-of-memory-encoding-function ;(POINTER NOCTETS START) 81 87 82 88 ;; Code units and character codes less than this value map to themselves … … 88 94 ;; that implements this encoding with swapped byte order. 89 95 (use-byte-order-mark nil) 90 ;; Can we reliably (and dumbly) assume that code-units that appear 91 ;; to represent #\u+000a and #\u+000d in fact represent LF and CR ? 92 (allows-line-termination-detection t) 96 ;; What alternate line-termination conventions can be encoded ? (This basically 97 ;; means "can #\Line_Separator be encoded?", since :CR and :CRLF can always 98 ;; be encoded.) 99 (alternate-line-termination-conventions '(:cr :crlf)) 93 100 ;; By what other MIME names is this encoding known ? 94 101 (aliases nil) 95 102 (documentation nil) 96 (encodable-limit char-code-limit)97 103 ) 98 104 … … 102 108 (defconstant swapped-byte-order-mark-char-code (char-code swapped-byte-order-mark)) 103 109 110 111 (defun decode-character-encoded-vector (encoding vector start-index noctets string) 112 (setq encoding (ensure-character-encoding encoding)) 113 (unless (= (the (unsigned-byte 8) (typecode vector)) 114 target::subtag-u8-vector) 115 (report-bad-arg vector '(simple-array (unsigned-byte 8) (*)))) 116 (unless (= (the (unsigned-byte 8) (typecode string)) 117 target::subtag-simple-base-string) 118 (report-bad-arg vector 'simple-string)) 119 (let* ((len (length vector))) 120 (declare (type index len)) 121 (unless (and (typep start-index 'fixnum) 122 (>= (the fixnum start-index) 0) 123 (< (the fixnum start-index) len)) 124 (error "~s is an invalid start index for ~s" start-index vector)) 125 (unless (and (typep noctets 'fixnum) 126 (>= (the fixnum noctets) 0) 127 (<= (+ (the fixnum start-index) (the fixnum noctets)) len)) 128 (error "~S is an invalid octet count for ~s at ~s" noctets vector start-index)) 129 (funcall (character-encoding-vector-decode-function encoding) 130 vector 131 start-index 132 noctets 133 string))) 104 134 105 135 … … 128 158 ;;; whose CHAR-CODE is >= 256 129 159 160 (defun 8-bit-fixed-width-octets-in-string (string start end) 161 (declare (ignore string)) 162 (if (>= end start) 163 (- end start) 164 0)) 165 166 (defun 8-bit-fixed-width-length-of-vector-encoding (vector start end) 167 (declare (ignore vector)) 168 (if (>= end start) 169 (values (- end start) (- end start)) 170 (values 0 0))) 171 172 (defun 8-bit-fixed-width-length-of-memory-encoding (pointer noctets start) 173 (declare (ignore pointer start)) 174 noctets) 175 130 176 (define-character-encoding :iso-8859-1 131 177 "An 8-bit, fixed-width character encoding in which all character … … 136 182 ;; the "null" 8-bit encoding 137 183 :aliases '(nil :iso_8859-1 :latin1 :l1 :ibm819 :cp819 :csISOLatin1) 138 139 184 :stream-encode-function 140 185 (nfunction … … 157 202 (nfunction 158 203 iso-8859-1-vector-encode 159 (lambda (string vector idx &optional (start 0) (end (length string)))204 (lambda (string vector idx start end) 160 205 (declare (type (simple-array (unsigned-byte 8) (*)) vector) 161 206 (fixnum idx)) … … 166 211 (declare (type (mod #x110000) code)) 167 212 (if (>= code 256) 168 ( return nil)169 (progn170 (setf (aref vector idx) code)171 (incf idx)))))))213 (setq code (char-code #\Sub))) 214 (progn 215 (setf (aref vector idx) code) 216 (incf idx)))))) 172 217 :vector-decode-function 173 218 (nfunction 174 219 iso-8859-1-vector-decode 175 (lambda (vector idx n units string)220 (lambda (vector idx noctets string) 176 221 (declare (type (simple-array (unsigned-byte 8) (*)) vector)) 177 222 (do* ((i 0 (1+ i)) 178 (len (length vector))179 223 (index idx (1+ index))) 180 ((>= i nunits) (values string index)) 181 (if (>= index len) 182 (return (values nil idx)) 183 (setf (schar string i) (code-char (the (unsigned-byte 8) 184 (aref vector index)))))))) 224 ((>= i noctets) index) 225 (setf (schar string i) (code-char (the (unsigned-byte 8) 226 (aref vector index))))))) 185 227 :memory-encode-function 186 228 (nfunction 187 229 iso-8859-1-memory-encode 188 (lambda (string pointer idx &optional (start 0) (end (length string)))230 (lambda (string pointer idx start end) 189 231 (do* ((i start (1+ i))) 190 232 ((>= i end) idx) … … 192 234 (declare (type (mod #x110000) code)) 193 235 (if (>= code 256) 194 (return nil) 195 (progn 196 (setf (%get-unsigned-byte pointer idx) code) 197 (incf idx))))))) 236 (setq code (char-code #\Sub))) 237 (setf (%get-unsigned-byte pointer idx) code) 238 (incf idx))))) 198 239 :memory-decode-function 199 240 (nfunction 200 241 iso-8859-1-memory-decode 201 (lambda (pointer n units idx string)242 (lambda (pointer noctets idx string) 202 243 (do* ((i 0 (1+ i)) 203 244 (index idx (1+ index))) 204 ((>= i n units) (values string index))245 ((>= i noctets) index) 205 246 (setf (schar string i) (code-char (the (unsigned-byte 8) 206 247 (%get-unsigned-byte pointer index))))))) 207 :units-in-string-function 208 (nfunction 209 iso-8859-1-units-in-string 210 (lambda (string &optional (start 0) (end (length string))) 211 (when (>= end start) 212 (do* ((i start (1+ i))) 213 ((= i end) (- end start)) 214 (let* ((code (char-code (schar string i)))) 215 (declare (type (mod #x110000) code)) 216 (unless (< code 256) (return nil))))))) 248 :octets-in-string-function 249 #'8-bit-fixed-width-octets-in-string 217 250 :length-of-vector-encoding-function 218 (nfunction 219 iso-8859-1-length-of-vector-encoding 220 (lambda (vector &optional (start 0) (end (length vector))) 221 (when (>= end start) 222 (- end start)))) 251 #'8-bit-fixed-width-length-of-vector-encoding 223 252 :length-of-memory-encoding-function 224 (nfunction 225 iso-8859-1-length-of-memory-encoding 226 (lambda (pointer nunits &optional start) 227 (declare (ignore pointer start)) 228 nunits)) 253 #'8-bit-fixed-width-length-of-memory-encoding 229 254 :literal-char-code-limit 256 230 :encodable-limit 256231 255 ) 232 256 … … 258 282 (nfunction 259 283 ascii-vector-encode 260 (lambda (string vector idx &optional (start 0) (end (length string)))284 (lambda (string vector idx start end) 261 285 (declare (type (simple-array (unsigned-byte 8) (*)) vector) 262 286 (fixnum idx)) … … 267 291 (declare (type (mod #x110000) code)) 268 292 (if (>= code 128) 269 (return nil) 270 (progn 271 (setf (aref vector idx) code) 272 (incf idx))))))) 293 (setq code (char-code #\Sub))) 294 (setf (aref vector idx) code) 295 (incf idx))))) 273 296 :vector-decode-function 274 297 (nfunction 275 298 ascii-vector-decode 276 (lambda (vector idx n units string)299 (lambda (vector idx noctets string) 277 300 (declare (type (simple-array (unsigned-byte 8) (*)) vector)) 278 301 (do* ((i 0 (1+ i)) 279 (len (length vector))280 302 (index idx (1+ index))) 281 ((>= i nunits) (values string index)) 282 (if (>= index len) 283 (return (values nil idx)) 284 (let* ((code (aref vector index))) 285 (declare (type (unsigned-byte 8) code)) 286 (if (< code 128) 287 (setf (schar string i) code) 288 (return (values nil idx)))))))) 303 ((>= i noctets) index) 304 (let* ((code (aref vector index))) 305 (declare (type (unsigned-byte 8) code)) 306 (when (>= code 128) 307 (setq code (char-code #\Sub))) 308 (setf (schar string i) code))))) 289 309 :memory-encode-function 290 310 (nfunction 291 311 ascii-memory-encode 292 (lambda (string pointer idx &optional (start 0) (end (length string)))312 (lambda (string pointer idx start end) 293 313 (do* ((i start (1+ i))) 294 314 ((>= i end) idx) … … 296 316 (declare (type (mod #x110000) code)) 297 317 (if (>= code 128) 298 (return nil) 299 (progn 300 (setf (%get-unsigned-byte pointer idx) code) 301 (incf idx))))))) 318 (setq code (char-code #\Sub))) 319 (setf (%get-unsigned-byte pointer idx) code) 320 (incf idx))))) 302 321 :memory-decode-function 303 322 (nfunction 304 323 ascii-memory-decode 305 (lambda (pointer n units idx string)324 (lambda (pointer noctets idx string) 306 325 (do* ((i 0 (1+ i)) 307 326 (index idx (1+ index))) 308 ((>= i n units) (values string index))327 ((>= i noctets) index) 309 328 (let* ((code (%get-unsigned-byte pointer index))) 310 329 (declare (type (unsigned-byte 8) code)) 311 (if (< code 128) 312 (setf (schar string i) (code-char code)) 313 (return (values nil idx))))))) 314 :units-in-string-function 315 (nfunction 316 ascii-units-in-string 317 (lambda (string &optional (start 0) (end (length string))) 318 (when (>= end start) 319 (do* ((i start (1+ i))) 320 ((= i end) (- end start)) 321 (let* ((code (char-code (schar string i)))) 322 (declare (type (mod #x110000) code)) 323 (unless (< code 128) (return nil))))))) 330 (if (>= code 128) 331 (setf (schar string i) #\sub) 332 (setf (schar string i) (code-char code))))))) 333 :octets-in-string-function 334 #'8-bit-fixed-width-octets-in-string 324 335 :length-of-vector-encoding-function 325 (nfunction 326 ascii-length-of-vector-encoding 327 (lambda (vector &optional (start 0) (end (length vector))) 328 (when (>= end start) 329 (do* ((i start (1+ i)) 330 (k 0 (1+ k))) 331 ((= i end) k) 332 (when (>= 128 (the (unsigned-byte 8) (aref vector i))) 333 (return nil)))))) 336 #'8-bit-fixed-width-length-of-vector-encoding 334 337 :length-of-memory-encoding-function 335 (nfunction 336 ascii-length-of-memory-encoding 337 (lambda (pointer nunits &optional (start 0)) 338 (do* ((i 0 (1+ i)) 339 (p start (1+ p))) 340 ((= i nunits) nunits) 341 (when (>= 128 (the (unsigned-byte 8) (%get-unsigned-byte pointer p))) 342 (return nil))))) 338 #'8-bit-fixed-width-length-of-memory-encoding 343 339 :literal-char-code-limit 128 344 :encodable-limit 128345 340 ) 346 341 … … 449 444 (nfunction 450 445 iso-8859-2-vector-encode 451 (lambda (string vector idx &optional (start 0) (end (length string)))446 (lambda (string vector idx start end) 452 447 (declare (type (simple-array (unsigned-byte 8) (*)) vector) 453 448 (fixnum idx)) … … 463 458 (the fixnum (- code #x2c0))))))) 464 459 (declare (type (mod #x110000) code)) 465 (if (null c2) 466 (return nil) 467 (progn 468 (setf (aref vector idx) c2) 469 (incf idx))))))) 460 (setf (aref vector idx) (or c2 (char-code #\Sub))) 461 (incf idx))))) 470 462 :vector-decode-function 471 463 (nfunction 472 464 iso-8859-2-vector-decode 473 (lambda (vector idx n units string)465 (lambda (vector idx noctets string) 474 466 (declare (type (simple-array (unsigned-byte 8) (*)) vector)) 475 (do* ((i 0 (1+ i)) 476 (len (length vector)) 467 (do* ((i 0 (1+ i)) 477 468 (index idx (1+ index))) 478 ((>= i nunits) (values string index)) 479 (if (>= index len) 480 (return (values nil idx)) 481 (let* ((1st-unit (aref vector index))) 469 ((>= i noctets) index) 470 (let* ((1st-unit (aref vector index))) 482 471 (declare (type (unsigned-byte 8) 1st-unit)) 483 472 (setf (schar string i) 484 473 (if (< 1st-unit #xa0) 485 474 (code-char 1st-unit) 486 (svref *iso-8859-2-to-unicode* (the fixnum (- 1st-unit #xa0)))) 487 )))))) 475 (svref *iso-8859-2-to-unicode* (the fixnum (- 1st-unit #xa0))))))))) 488 476 :memory-encode-function 489 477 (nfunction 490 478 iso-8859-2-memory-encode 491 (lambda (string pointer idx &optional (start 0) (end (length string)))479 (lambda (string pointer idx start end) 492 480 (do* ((i start (1+ i))) 493 481 ((>= i end) idx) … … 501 489 (the fixnum (- code #x2c0))))))) 502 490 (declare (type (mod #x110000) code)) 503 (if (null c2) 504 (return nil) 505 (progn 506 (setf (%get-unsigned-byte pointer idx) c2) 507 (1+ idx))))))) 491 (setf (%get-unsigned-byte pointer idx) (or c2 (char-code #\Sub))) 492 (1+ idx))))) 508 493 :memory-decode-function 509 494 (nfunction 510 495 iso-8859-2-memory-decode 511 (lambda (pointer n units idx string)496 (lambda (pointer noctets idx string) 512 497 (do* ((i 0 (1+ i)) 513 498 (index idx (1+ index))) 514 ((>= i n units) (values string index))499 ((>= i noctets) index) 515 500 (let* ((1st-unit (%get-unsigned-byte pointer index))) 516 501 (declare (type (unsigned-byte 8) 1st-unit)) … … 519 504 (code-char 1st-unit) 520 505 (svref *iso-8859-2-to-unicode* (the fixnum (- 1st-unit #xa0))))))))) 521 :units-in-string-function 522 (nfunction 523 iso-8859-2-units-in-string 524 (lambda (string &optional (start 0) (end (length string))) 525 (when (>= end start) 526 (do* ((i start (1+ i))) 527 ((= i end) (- end start)) 528 (let* ((code (char-code (schar string i))) 529 (c2 (cond ((< code #xa0) code) 530 ((< code #x180) 531 (svref *unicode-00a0-0180-to-iso8859-2* 532 (the fixnum (- code #xa0)))) 533 ((and (>= code #x2c0) (< code #x2e0)) 534 (svref *unicode-00c0-00e0-to-iso8859-2* 535 (the fixnum (- code #x2c0))))))) 536 (declare (type (mod #x110000) code)) 537 (unless c2 (return nil))))))) 506 :octets-in-string-function 507 #'8-bit-fixed-width-octets-in-string 538 508 :length-of-vector-encoding-function 539 (nfunction 540 iso-8859-2-length-of-vector-encoding 541 (lambda (vector &optional (start 0) (end (length vector))) 542 (when (>= end start) 543 (- end start)))) 509 #'8-bit-fixed-width-length-of-vector-encoding 544 510 :length-of-memory-encoding-function 545 (nfunction 546 iso-8859-2-length-of-memory-encoding 547 (lambda (pointer nunits &optional start) 548 (declare (ignore pointer start)) 549 nunits)) 511 #'8-bit-fixed-width-length-of-memory-encoding 550 512 :literal-char-code-limit #xa0 551 513 ) … … 652 614 (nfunction 653 615 iso-8859-3-vector-encode 654 (lambda (string vector idx &optional (start 0) (end (length string)))616 (lambda (string vector idx start end) 655 617 (declare (type (simple-array (unsigned-byte 8) (*)) vector) 656 618 (fixnum idx)) … … 668 630 ((and (>= code #x2d8) (< code #x2e0)) 669 631 (svref *unicode-2d8-2e0-to-iso8859-3* 670 (the fixnum (- code #x2d8))))))) 632 633 (the fixnum (- code #x2d8))))))) 671 634 (declare (type (mod #x110000) code)) 672 (if (null c2) 673 (return nil) 674 (progn 675 (setf (aref vector idx) c2) 676 (incf idx))))))) 635 (setf (aref vector idx) (or c2 (char-code #\Sub))) 636 (incf idx))))) 677 637 :vector-decode-function 678 638 (nfunction 679 639 iso-8859-3-vector-decode 680 (lambda (vector idx n units string)640 (lambda (vector idx noctets string) 681 641 (declare (type (simple-array (unsigned-byte 8) (*)) vector)) 682 642 (do* ((i 0 (1+ i)) 683 (len (length vector))684 643 (index idx (1+ index))) 685 ((>= i nunits) (values string index)) 686 (if (>= index len) 687 (return (values nil idx)) 644 ((>= i noctets) index) 688 645 (let* ((1st-unit (aref vector index))) 689 646 (declare (type (unsigned-byte 8) 1st-unit)) 690 647 (setf (schar string i) 691 (if (< 1st-unit #xa0)692 (code-char 1st-unit)693 (svref *iso-8859-3-to-unicode* (the fixnum (- 1st-unit #xa0))))))))))648 (if (< 1st-unit #xa0) 649 (code-char 1st-unit) 650 (svref *iso-8859-3-to-unicode* (the fixnum (- 1st-unit #xa0))))))))) 694 651 :memory-encode-function 695 652 (nfunction 696 653 iso-8859-3-memory-encode 697 (lambda (string pointer idx &optional (start 0) (end (length string)))654 (lambda (string pointer idx start end) 698 655 (do* ((i start (1+ i))) 699 656 ((>= i end) idx) … … 710 667 (the fixnum (- code #x2d8))))))) 711 668 (declare (type (mod #x110000) code)) 712 (if (null c2) 713 (return nil) 714 (progn 715 (setf (%get-unsigned-byte pointer idx) c2) 716 (incf idx))))))) 669 (setf (%get-unsigned-byte pointer idx) (or c2 (char-code #\Sub))) 670 (incf idx))))) 717 671 :memory-decode-function 718 672 (nfunction 719 673 iso-8859-3-memory-decode 720 (lambda (pointer n units idx string)674 (lambda (pointer noctets idx string) 721 675 (do* ((i 0 (1+ i)) 722 676 (index idx (1+ index))) 723 ((>= i n units) (values string index))677 ((>= i noctets) index) 724 678 (let* ((1st-unit (%get-unsigned-byte pointer index))) 725 679 (declare (type (unsigned-byte 8) 1st-unit)) … … 728 682 (code-char 1st-unit) 729 683 (svref *iso-8859-3-to-unicode* (the fixnum (- 1st-unit #xa0))))))))) 730 :units-in-string-function 731 (nfunction 732 iso-8859-3-units-in-string 733 (lambda (string &optional (start 0) (end (length string))) 734 (when (>= end start) 735 (do* ((i start (1+ i))) 736 ((= i end) (- end start)) 737 (let* ((code (char-code (schar string i))) 738 (c2 (cond ((< code #xa0) code) 739 ((< code #x100) 740 (svref *unicode-a0-100-to-iso8859-3* 741 (the fixnum (- code #xa0)))) 742 ((and (>= code #x108) (< code #x180)) 743 (svref *unicode-108-180-to-iso8859-3* 744 (the fixnum (- code #x108)))) 745 ((and (>= code #x2d8) (< code #x2e0)) 746 (svref *unicode-2d8-2e0-to-iso8859-3* 747 (the fixnum (- code #x2d8))))))) 748 (declare (type (mod #x110000) code)) 749 (unless c2 (return nil))))))) 684 :octets-in-string-function 685 #'8-bit-fixed-width-octets-in-string 750 686 :length-of-vector-encoding-function 751 (nfunction 752 iso-8859-3-length-of-vector-encoding 753 (lambda (vector &optional (start 0) (end (length vector))) 754 (when (>= end start) 755 (- end start)))) 687 #'8-bit-fixed-width-length-of-vector-encoding 756 688 :length-of-memory-encoding-function 757 (nfunction 758 iso-8859-3-length-of-memory-encoding 759 (lambda (pointer nunits &optional start) 760 (declare (ignore pointer start)) 761 nunits)) 689 #'8-bit-fixed-width-length-of-memory-encoding 762 690 :literal-char-code-limit #xa0 763 691 ) … … 864 792 (nfunction 865 793 iso-8859-4-vector-encode 866 (lambda (string vector idx &optional (start 0) (end (length string)))794 (lambda (string vector idx start end) 867 795 (declare (type (simple-array (unsigned-byte 8) (*)) vector) 868 796 (fixnum idx)) … … 879 807 (the fixnum (- code #x2c0))))))) 880 808 (declare (type (mod #x110000) code)) 881 (if (null c2) 882 (return nil) 883 (progn 884 (setf (aref vector idx) c2) 885 (incf idx))))))) 809 (setf (aref vector idx) (or c2 (char-code #\Sub))) 810 (incf idx))))) 886 811 :vector-decode-function 887 812 (nfunction 888 813 iso-8859-4-vector-decode 889 (lambda (vector idx n units string)814 (lambda (vector idx noctets string) 890 815 (declare (type (simple-array (unsigned-byte 8) (*)) vector)) 891 816 (do* ((i 0 (1+ i)) 892 (len (length vector))893 817 (index idx (1+ index))) 894 ((>= i nunits) (values string index)) 895 (if (>= index len) 896 (return (values nil idx)) 897 (let* ((1st-unit (aref vector index))) 898 (declare (type (unsigned-byte 8) 1st-unit)) 899 (setf (schar string i) 900 (if (< 1st-unit #xa0) 901 (code-char 1st-unit) 902 (svref *iso-8859-4-to-unicode* (the fixnum (- 1st-unit #xa0)))))))))) 818 ((>= i noctets) index) 819 (let* ((1st-unit (aref vector index))) 820 (declare (type (unsigned-byte 8) 1st-unit)) 821 (setf (schar string i) 822 (if (< 1st-unit #xa0) 823 (code-char 1st-unit) 824 (svref *iso-8859-4-to-unicode* (the fixnum (- 1st-unit #xa0))))))))) 903 825 :memory-encode-function 904 826 (nfunction 905 827 iso-8859-4-memory-encode 906 (lambda (string pointer idx &optional (start 0) (end (length string)))828 (lambda (string pointer idx start end) 907 829 (do* ((i start (1+ i))) 908 830 ((>= i end) idx) … … 916 838 (the fixnum (- code #x2c0))))))) 917 839 (declare (type (mod #x110000) code)) 918 (if (null c2) 919 (return nil) 920 (progn 921 (setf (%get-unsigned-byte pointer idx) c2) 922 (incf idx))))))) 840 (setf (%get-unsigned-byte pointer idx) (or c2 (char-code #\Sub))) 841 (incf idx))))) 923 842 :memory-decode-function 924 843 (nfunction 925 844 iso-8859-4-memory-decode 926 (lambda (pointer n units idx string)845 (lambda (pointer noctets idx string) 927 846 (do* ((i 0 (1+ i)) 928 847 (index idx (1+ index))) 929 ((>= i n units) (values string index))848 ((>= i noctets) index) 930 849 (let* ((1st-unit (%get-unsigned-byte pointer index))) 931 850 (declare (type (unsigned-byte 8) 1st-unit)) … … 934 853 (code-char 1st-unit) 935 854 (svref *iso-8859-4-to-unicode* (the fixnum (- 1st-unit #xa0))))))))) 936 :units-in-string-function 937 (nfunction 938 iso-8859-4-units-in-string 939 (lambda (string &optional (start 0) (end (length string))) 940 (when (>= end start) 941 (do* ((i start (1+ i))) 942 ((= i end) (- end start)) 943 (let* ((code (char-code (schar string i))) 944 (c2 (cond ((< code #xa0) code) 945 ((< code #x180) 946 (svref *unicode-a0-180-to-iso8859-4* 947 (the fixnum (- code #xa0)))) 948 ((and (>= code #x2d8) (< code #x2e0)) 949 (svref *unicode-2c0-2e0-to-iso8859-4* 950 (the fixnum (- code #x2c0))))) )) 951 (declare (type (mod #x110000) code)) 952 (unless c2 (return nil))))))) 855 :octets-in-string-function 856 #'8-bit-fixed-width-octets-in-string 953 857 :length-of-vector-encoding-function 954 (nfunction 955 iso-8859-4-length-of-vector-encoding 956 (lambda (vector &optional (start 0) (end (length vector))) 957 (when (>= end start) 958 (- end start)))) 858 #'8-bit-fixed-width-length-of-vector-encoding 959 859 :length-of-memory-encoding-function 960 (nfunction 961 iso-8859-4-length-of-memory-encoding 962 (lambda (pointer nunits &optional start) 963 (declare (ignore pointer start)) 964 nunits)) 860 #'8-bit-fixed-width-length-of-memory-encoding 965 861 :literal-char-code-limit #xa0 966 862 ) … … 1017 913 (if (< 1st-unit #x80) 1018 914 (code-char 1st-unit) 1019 ( when(>= 1st-unit #xc2)915 (if (>= 1st-unit #xc2) 1020 916 (let* ((s1 (funcall next-unit-function stream))) 1021 917 (if (eq s1 :eof) … … 1076 972 (the fixnum (logxor s3 #x80)))))) 1077 973 #\Replacement_Character)))) 1078 #\Replacement_Character))))))))))))) 974 #\Replacement_Character))))))))) 975 #\Replacement_Character)))) 1079 976 :vector-encode-function 1080 977 (nfunction 1081 978 utf-8-vector-encode 1082 (lambda (string vector idx &optional (start 0) (end (length string)))979 (lambda (string vector idx start end) 1083 980 (declare (type (simple-array (unsigned-byte 8) (*)) vector) 1084 981 (fixnum idx)) … … 1094 991 (setf (aref vector idx) 1095 992 (logior #xc0 (the fixnum (ash code -6)))) 1096 (incf idx) 1097 (setf (aref vector idx) 993 (setf (aref vector (the fixnum (1+ idx))) 1098 994 (logior #x80 (the fixnum (logand code #x3f)))) 1099 (incf idx ))995 (incf idx 2)) 1100 996 ((< code #x10000) 1101 997 (setf (aref vector idx) 1102 998 (logior #xe0 (the fixnum (ash code -12)))) 1103 (incf idx) 1104 (setf (aref vector idx) 999 (setf (aref vector (the fixnum (1+ idx))) 1105 1000 (logior #x80 (the fixnum (logand #x3f (the fixnum (ash code -6)))))) 1106 (incf idx) 1107 (setf (aref vector idx) 1001 (setf (aref vector (the fixnum (+ idx 2))) 1108 1002 (logior #x80 (the fixnum (logand code #x3f)))) 1109 (incf idx ))1003 (incf idx 3)) 1110 1004 (t 1111 1005 (setf (aref vector idx) 1112 1006 (logior #xf0 1113 1007 (the fixnum (logand #x7 (the fixnum (ash code -18)))))) 1114 (incf idx) 1115 (setf (aref vector idx) 1008 (setf (aref vector (the fixnum (1+ idx))) 1116 1009 (logior #x80 (the fixnum (logand #x3f (the fixnum (ash code -12)))))) 1117 (incf idx) 1118 (setf (aref vector idx) 1010 (setf (aref vector (the fixnum (+ idx 2))) 1119 1011 (logior #x80 (the fixnum (logand #x3f (the fixnum (ash code -6)))))) 1120 (incf idx) 1121 (setf (aref vector idx) (logand #x3f code)) 1122 (incf idx))))))) 1012 (setf (aref vector (the fixnum (+ idx 3))) (logand #x3f code)) 1013 (incf idx 4))))))) 1123 1014 :vector-decode-function 1124 1015 (nfunction 1125 1016 utf-8-vector-decode 1126 (lambda (vector idx n units string)1017 (lambda (vector idx noctets string) 1127 1018 (declare (type (simple-array (unsigned-byte 8) (*)) vector) 1128 1019 (type index idx)) 1129 1020 (do* ((i 0 (1+ i)) 1130 ( len (length vector))1021 (end (+ idx noctets)) 1131 1022 (index idx (1+ index))) 1132 ((>= i nunits) (values string index)) 1133 (if (>= index len) 1134 (values nil idx) 1023 ((= index end) index) 1135 1024 (let* ((1st-unit (aref vector index))) 1136 1025 (declare (type (unsigned-byte 8) 1st-unit)) … … 1139 1028 (code-char 1st-unit) 1140 1029 (if (>= 1st-unit #xc2) 1141 (let* ((2nd-unit (aref vector (incf index)))) 1142 (declare (type (unsigned-byte 8) 2nd-unit)) 1143 (if (< 1st-unit #xe0) 1144 (if (< (the fixnum (logxor 2nd-unit #x80)) #x40) 1145 (code-char 1146 (logior 1147 (the fixnum (ash (the fixnum (logand #x1f 1st-unit)) 6)) 1148 (the fixnum (logxor 2nd-unit #x80))))) 1149 (let* ((3rd-unit (aref vector (incf index)))) 1150 (declare (type (unsigned-byte 8) 3rd-unit)) 1151 (if (< 1st-unit #xf0) 1152 (if (and (< (the fixnum (logxor 2nd-unit #x80)) #x40) 1153 (< (the fixnum (logxor 3rd-unit #x80)) #x40) 1154 (or (>= 1st-unit #xe1) 1155 (>= 2nd-unit #xa0))) 1156 (code-char (the fixnum 1157 (logior (the fixnum 1158 (ash (the fixnum (logand 1st-unit #xf)) 1159 12)) 1160 (the fixnum 1161 (logior 1162 (the fixnum 1163 (ash (the fixnum (logand 2nd-unit #x3f)) 1164 6)) 1165 (the fixnum (logand 3rd-unit #x3f)))))))) 1166 (let* ((4th-unit (aref vector (incf index)))) 1167 (declare (type (unsigned-byte 8) 4th-unit)) 1030 (let* ((2nd-unit (aref vector (incf index)))) 1031 (declare (type (unsigned-byte 8) 2nd-unit)) 1032 (if (< 1st-unit #xe0) 1033 (if (< (the fixnum (logxor 2nd-unit #x80)) #x40) 1034 (code-char 1035 (logior 1036 (the fixnum (ash (the fixnum (logand #x1f 1st-unit)) 6)) 1037 (the fixnum (logxor 2nd-unit #x80))))) 1038 (let* ((3rd-unit (aref vector (incf index)))) 1039 (declare (type (unsigned-byte 8) 3rd-unit)) 1040 (if (< 1st-unit #xf0) 1168 1041 (if (and (< (the fixnum (logxor 2nd-unit #x80)) #x40) 1169 1042 (< (the fixnum (logxor 3rd-unit #x80)) #x40) 1170 (< (the fixnum (logxor 4th-unit #x80)) #x40) 1171 (or (>= 1st-unit #xf1) 1172 (>= 2nd-unit #x90))) 1173 (code-char 1174 (logior 1175 (the fixnum 1176 (logior 1177 (the fixnum 1178 (ash (the fixnum (logand 1st-unit 7)) 18)) 1179 (the fixnum 1180 (ash (the fixnum (logxor 2nd-unit #x80)) 12)))) 1181 (the fixnum 1182 (logior 1183 (the fixnum 1184 (ash (the fixnum (logxor 3rd-unit #x80)) 6)) 1185 (the fixnum (logxor 4th-unit #x80)))))))))))))))) 1186 (if char 1187 (setf (schar string i) char) 1188 (return (values nil idx))))))))) 1043 (or (>= 1st-unit #xe1) 1044 (>= 2nd-unit #xa0))) 1045 (code-char (the fixnum 1046 (logior (the fixnum 1047 (ash (the fixnum (logand 1st-unit #xf)) 1048 12)) 1049 (the fixnum 1050 (logior 1051 (the fixnum 1052 (ash (the fixnum (logand 2nd-unit #x3f)) 1053 6)) 1054 (the fixnum (logand 3rd-unit #x3f)))))))) 1055 (let* ((4th-unit (aref vector (incf index)))) 1056 (declare (type (unsigned-byte 8) 4th-unit)) 1057 (if (and (< (the fixnum (logxor 2nd-unit #x80)) #x40) 1058 (< (the fixnum (logxor 3rd-unit #x80)) #x40) 1059 (< (the fixnum (logxor 4th-unit #x80)) #x40) 1060 (or (>= 1st-unit #xf1) 1061 (>= 2nd-unit #x90))) 1062 (code-char 1063 (logior 1064 (the fixnum 1065 (logior 1066 (the fixnum 1067 (ash (the fixnum (logand 1st-unit 7)) 18)) 1068 (the fixnum 1069 (ash (the fixnum (logxor 2nd-unit #x80)) 12)))) 1070 (the fixnum 1071 (logior 1072 (the fixnum 1073 (ash (the fixnum (logxor 3rd-unit #x80)) 6)) 1074 (the fixnum (logxor 4th-unit #x80)))))))))))))))) 1075 (setf (schar string i) (or char #\Replacement_Character))))))) 1189 1076 :memory-encode-function 1190 1077 (nfunction 1191 1078 utf-8-memory-encode 1192 (lambda (string pointer idx &optional (start 0) (end (length string)))1079 (lambda (string pointer idx start end) 1193 1080 (declare (fixnum idx)) 1194 1081 (do* ((i start (1+ i))) … … 1202 1089 (setf (%get-unsigned-byte pointer idx) 1203 1090 (logior #xc0 (the fixnum (ash code -6)))) 1204 (incf idx) 1205 (setf (%get-unsigned-byte pointer idx) 1091 (setf (%get-unsigned-byte pointer (the fixnum (1+ idx))) 1206 1092 (logior #x80 (the fixnum (logand code #x3f)))) 1207 (incf idx ))1093 (incf idx 2)) 1208 1094 ((< code #x10000) 1209 1095 (setf (%get-unsigned-byte pointer idx) 1210 1096 (logior #xe0 (the fixnum (ash code -12)))) 1211 (incf idx) 1212 (setf (%get-unsigned-byte pointer idx) 1097 (setf (%get-unsigned-byte pointer (the fixnum (1+ idx))) 1213 1098 (logior #x80 (the fixnum (logand #x3f (the fixnum (ash code -6)))))) 1214 (incf idx) 1215 (setf (%get-unsigned-byte pointer idx) 1099 (setf (%get-unsigned-byte pointer (the fixnum (+ idx 2))) 1216 1100 (logior #x80 (the fixnum (logand code #x3f)))) 1217 (incf idx ))1101 (incf idx 3)) 1218 1102 (t 1219 1103 (setf (%get-unsigned-byte pointer idx) 1220 1104 (logior #xf0 1221 1105 (the fixnum (logand #x7 (the fixnum (ash code -18)))))) 1222 (incf idx) 1223 (setf (%get-unsigned-byte pointer idx) 1106 (setf (%get-unsigned-byte pointer (the fixnum (1+ idx))) 1224 1107 (logior #x80 (the fixnum (logand #x3f (the fixnum (ash code -12)))))) 1225 (incf idx) 1226 (setf (%get-unsigned-byte pointer idx) 1108 (setf (%get-unsigned-byte pointer (the fixnum (+ idx 2))) 1227 1109 (logior #x80 (the fixnum (logand #x3f (the fixnum (ash code -6)))))) 1228 (incf idx) 1229 (setf (%get-unsigned-byte pointer idx) 1110 (setf (%get-unsigned-byte pointer (the fixnum (+ idx 3))) 1230 1111 (logand #x3f code)) 1231 (incf idx )))))))1112 (incf idx 4))))))) 1232 1113 :memory-decode-function 1233 1114 (nfunction 1234 1115 utf-8-memory-decode 1235 (lambda (pointer n units idx string)1236 (declare (fixnum n units idx))1116 (lambda (pointer noctets idx string) 1117 (declare (fixnum noctets idx)) 1237 1118 (do* ((i 0 (1+ i)) 1119 (end (+ idx noctets)) 1238 1120 (index idx (1+ index))) 1239 ((>= i nunits) (values string index))1121 ((>= index end) (if (= index end) index 0)) 1240 1122 (let* ((1st-unit (%get-unsigned-byte pointer index))) 1241 1123 (declare (type (unsigned-byte 8) 1st-unit)) … … 1289 1171 (ash (the fixnum (logxor 3rd-unit #x80)) 6)) 1290 1172 (the fixnum (logxor 4th-unit #x80))))))))))))))))) 1291 (if char 1292 (setf (schar string i) char) 1293 (return (values nil idx)))))))) 1294 :units-in-string-function 1173 (setf (schar string i) (or char #\Replacement_Character))))))) 1174 :octets-in-string-function 1295 1175 (nfunction 1296 utf-8- units-in-string1297 (lambda (string &optional (start 0) (end (length string)))1298 ( when(>= end start)1299 (do* ((n units 0)1176 utf-8-octets-in-string 1177 (lambda (string start end) 1178 (if (>= end start) 1179 (do* ((noctets 0) 1300 1180 (i start (1+ i))) 1301 ((= i end) n units)1302 (declare (fixnum n units))1181 ((= i end) noctets) 1182 (declare (fixnum noctets)) 1303 1183 (let* ((code (char-code (schar string i)))) 1304 1184 (declare (type (mod #x110000) code)) 1305 (incf n units1185 (incf noctets 1306 1186 (if (< code #x80) 1307 1187 1 … … 1310 1190 (if (< code #x10000) 1311 1191 3 1312 4))))))))) 1192 4)))))) 1193 0))) 1313 1194 :length-of-vector-encoding-function 1314 1195 (nfunction 1315 1196 utf-8-length-of-vector-encoding 1316 (lambda (vector &optional (start 0) (end (length vector)))1197 (lambda (vector start end) 1317 1198 (declare (type (simple-array (unsigned-byte 8) (*)) vector)) 1318 1199 (do* ((i start) 1319 (nchars 0 (1+ nchars)))1200 (nchars 0)) 1320 1201 ((>= i end) 1321 (if (= i end) nchars)) 1322 (let* ((code (aref vector i))) 1202 (if (= i end) (values nchars i))) 1203 (declare (fixnum i)) 1204 (let* ((code (aref vector i)) 1205 (nexti (+ i (cond ((< code #x80) 1) 1206 ((< code #xe0) 2) 1207 ((< code #xf0) 3) 1208 (t 4))))) 1323 1209 (declare (type (unsigned-byte 8) code)) 1324 (incf i 1325 (cond ((< code #x80) 1) 1326 ((< code #xe0) 2) 1327 ((< code #xf0) 3) 1328 (t 4))))))) 1210 (if (> nexti end) 1211 (return (values nchars i)) 1212 (setq nchars (1+ nchars) i nexti)))))) 1329 1213 :length-of-memory-encoding-function 1330 1214 (nfunction 1331 1215 utf-8-length-of-memory-encoding 1332 (lambda (pointer n units &optional (start 0))1216 (lambda (pointer noctets start) 1333 1217 (do* ((i start) 1218 (end (+ start noctets)) 1334 1219 (nchars 0 (1+ nchars))) 1335 ((>= i nunits) 1336 (if (= i nunits) nchars)) 1337 (let* ((code (%get-unsigned-byte pointer i))) 1220 ((= i end) (values nchars i)) 1221 (let* ((code (%get-unsigned-byte pointer i)) 1222 (nexti (+ i (cond ((< code #x80) 1) 1223 ((< code #xe0) 2) 1224 ((< code #xf0) 3) 1225 (t 4))))) 1338 1226 (declare (type (unsigned-byte 8) code)) 1339 (incf i 1340 (cond ((< code #x80) 1) 1341 ((< code #xe0) 2) 1342 ((< code #xf0) 3) 1343 (t 4))))))) 1227 (if (> nexti end) 1228 (return (values nchars i)) 1229 (setq nchars (1+ nchars) i nexti)))))) 1344 1230 :literal-char-code-limit #x80 1345 1231 ) 1232 1346 1233 1347 1234 ;;; For a code-unit-size greater than 8: the stream-encode function's write-function … … 1352 1239 1353 1240 1241 (defmacro utf-16-combine-surrogate-pairs (a b) 1242 `(code-char 1243 (the (unsigned-byte 21) 1244 (+ #x10000 1245 (the (unsigned-byte 20) 1246 (logior 1247 (the (unsigned-byte 20) (ash (the (unsigned-byte 10) 1248 (- ,a #xd800)) 1249 10)) 1250 (the (unsigned-byte 10) (- ,b #xdc00)))))))) 1251 1354 1252 (defun utf-16-stream-encode (char write-function stream) 1355 1253 (let* ((code (char-code char)) … … 1378 1276 (if (and (>= 2nd-unit #xdc00) 1379 1277 (< 2nd-unit #xe000)) 1380 (code-char (the (unsigned-byte 21) 1381 (logior 1382 (the (unsigned-byte 20) (ash (the (unsigned-byte 10) 1383 (- 1st-unit #xd800)) 1384 10)) 1385 (the (unsigned-byte 10) (- 2nd-unit #xdc00))))) 1386 #\Replacement_Character))))))) 1387 1388 1389 (defun utf-16-units-in-string (string &optional (start 0) (end (length string))) 1390 (when (>= end start) 1391 (do* ((nunits 0) 1278 (utf-16-combine-surrogate-pairs 1st-unit 2nd-unit) 1279 #\Replacement_Character)))) 1280 #\Replacement_Character))) 1281 1282 1283 (defun utf-16-octets-in-string (string start end) 1284 (if (>= end start) 1285 (do* ((noctets 0) 1392 1286 (i start (1+ i))) 1393 ((= i end) n units)1394 (declare (fixnum n units))1287 ((= i end) noctets) 1288 (declare (fixnum noctets)) 1395 1289 (let* ((code (char-code (schar string i)))) 1396 1290 (declare (type (mod #x110000) code)) 1397 (incf n units1291 (incf noctets 1398 1292 (if (< code #x10000) 1399 1 1400 2)))))) 1293 2 1294 4)))) 1295 0)) 1296 1297 1298 (declaim (inline %big-endian-u8-ref-u16 %little-endian-u8-ref-u16)) 1299 (defun %big-endian-u8-ref-u16 (u8-vector idx) 1300 (declare (type (simple-array (unsigned-byte 8) (*)) u8-vector) 1301 (fixnum idx)) 1302 (logior (the (unsigned-byte 16) (ash (the (unsigned-byte 8) (aref u8-vector idx)) 8)) 1303 (the (unsigned-byte 8) (aref u8-vector (the fixnum (1+ idx)))))) 1304 1305 (defun %little-endian-u8-ref-u16 (u8-vector idx) 1306 (declare (type (simple-array (unsigned-byte 8) (*)) u8-vector) 1307 (fixnum idx)) 1308 (logior (the (unsigned-byte 16) (ash (the (unsigned-byte 8) 1309 (aref u8-vector (the fixnum (1+ idx)))) 8)) 1310 (the (unsigned-byte 8) (aref u8-vector idx)))) 1311 1312 #+big-endian-target 1313 (progn 1314 (defmacro %native-u8-ref-u16 (vector idx) 1315 `(%big-endian-u8-ref-u16 ,vector ,idx)) 1316 1317 (defmacro %reversed-u8-ref-u16 (vector idx) 1318 `(%little-endian-u8-ref-u16 ,vector ,idx)) 1319 ) 1320 1321 #+little-endian-target 1322 (progn 1323 (defmacro %native-u8-ref-u16 (vector idx) 1324 `(%little-endian-u8-ref-u16 ,vector ,idx)) 1325 1326 (defmacro %reversed-u8-ref-u16 (vector idx) 1327 `(%big-endian-u8-ref-u16 ,vector ,idx)) 1328 ) 1329 1330 1331 (declaim (inline (setf %big-endian-u8-ref-u16) (setf %little-endian-u8-ref-u16))) 1332 (defun (setf %big-endian-u8-ref-u16) (val u8-vector idx) 1333 (declare (type (unsigned-byte 16) val) 1334 (type (simple-array (unsigned-byte 8) (*)) u8-vector) 1335 (fixnum idx)) 1336 (setf (aref u8-vector idx) (ldb (byte 8 8) val) 1337 (aref u8-vector (the fixnum (1+ idx))) (ldb (byte 8 0) val)) 1338 val) 1339 1340 (defun (setf %little-endian-u8-ref-u16) (val u8-vector idx) 1341 (declare (type (unsigned-byte 16) val) 1342 (type (simple-array (unsigned-byte 8) (*)) u8-vector) 1343 (fixnum idx)) 1344 (setf (aref u8-vector idx) (ldb (byte 8 0) val) 1345 (aref u8-vector (the fixnum (1+ idx))) (ldb (byte 8 8) val)) 1346 val) 1347 1401 1348 1402 1349 ;;; utf-16, native byte order. … … 1426 1373 (nfunction 1427 1374 native-utf-16-vector-encode 1428 (lambda (string vector idx &optional (start 0) (end (length string)))1429 (declare (type (simple-array (unsigned-byte 16) (*)) vector)1430 (fixnum idx ))1375 (lambda (string vector idx start end) 1376 (declare (type (simple-array (unsigned-byte 8) (*)) vector) 1377 (fixnum idx start end)) 1431 1378 (do* ((i start (1+ i))) 1432 1379 ((>= i end) idx) 1380 (declare (fixnum i)) 1433 1381 (let* ((char (schar string i)) 1434 1382 (code (char-code char)) … … 1437 1385 (fixnum highbits)) 1438 1386 (cond ((< highbits 0) 1439 (setf ( arefvector idx) code)1440 (incf idx ))1387 (setf (%native-u8-ref-u16 vector idx) code) 1388 (incf idx 2)) 1441 1389 (t 1442 (setf (aref vector idx) (logior #xd800 (the fixnum (ash highbits -10)))) 1443 (incf idx) 1444 (setf (aref vector ) (logior #xdc00 (the fixnum (logand highbits #x3ff)))) 1445 (incf idx))))))) 1390 (let* ((firstword (logior #xd800 (the fixnum (ash highbits -10)))) 1391 (secondword (logior #xdc00 (the fixnum (logand highbits #x3ff))))) 1392 (declare (type (unsigned-byte 16) firstword secondword)) 1393 (setf (%native-u8-ref-u16 vector idx) firstword 1394 (%native-u8-ref-u16 vector (the fixnum (+ idx 2))) secondword) 1395 (incf idx 4)))))))) 1446 1396 :vector-decode-function 1447 1397 (nfunction 1448 1398 native-utf-16-vector-decode 1449 (lambda (vector idx n units string)1450 (declare (type (simple-array (unsigned-byte 16) (*)) vector)1399 (lambda (vector idx noctets string) 1400 (declare (type (simple-array (unsigned-byte 8) (*)) vector) 1451 1401 (type index idx)) 1452 1402 (do* ((i 0 (1+ i)) 1453 ( len (length vector))1454 (index idx (1+ index)))1455 (( >= i nunits) (values string index))1403 (end (+ idx noctets)) 1404 (index idx)) 1405 ((= index end) index) 1456 1406 (declare (fixnum i len index)) 1457 (if (>= index len) 1458 (values nil idx) 1459 (let* ((1st-unit (aref vector index))) 1460 (declare (type (unsigned-byte 16) 1st-unit)) 1461 (let* ((char 1462 (if (or (< 1st-unit #xd800) 1463 (>= 1st-unit #xe000)) 1464 (code-char 1st-unit) 1465 (if (< 1st-unit #xdc00) 1466 (let* ((2nd-unit (aref vector (incf index)))) 1467 (declare (type (unsigned-byte 16) 2nd-unit)) 1468 (if (and (>= 2nd-unit #xdc00) 1469 (< 2nd-unit #xe000)) 1470 (code-char (the (unsigned-byte 21) 1471 (logior 1472 (the (unsigned-byte 20) (ash (the (unsigned-byte 10) 1473 (- 1st-unit #xd800)) 1474 10)) 1475 (the (unsigned-byte 10) (- 2nd-unit #xdc00))))))))))) 1476 (if char 1477 (setf (schar string i) char) 1478 (return (values nil idx))))))))) 1479 :memory-encode-function 1480 (nfunction 1481 native-utf-16-memory-encode 1482 (lambda (string pointer idx &optional (start 0) (end (length string))) 1483 (declare (fixnum idx)) 1484 (do* ((i start (1+ i))) 1485 ((>= i end) idx) 1486 (let* ((code (char-code (schar string i))) 1487 (highbits (- code #x10000)) 1488 (p (+ idx idx))) 1489 (declare (type (mod #x110000) code) 1490 (fixnum p highbits)) 1491 (cond ((< highbits 0) 1492 (setf (%get-unsigned-word pointer p) code) 1493 (incf idx) 1494 (incf p 2)) 1495 1496 (t 1497 (setf (%get-unsigned-word pointer p) (logior #xd800 (the fixnum (ash highbits -10)))) 1498 (incf idx) 1499 (incf p 2) 1500 (setf (%get-unsigned-word pointer p) (logior #xdc00 (the fixnum (logand highbits #x3ff)))) 1501 (incf idx) 1502 (incf p 2))))))) 1503 :memory-decode-function 1504 (nfunction 1505 native-utf-16-memory-decode 1506 (lambda (pointer nunits idx string) 1507 (declare (fixnum nunits idx)) 1508 (do* ((i 0 (1+ i)) 1509 (index idx (1+ index)) 1510 (p (+ index index) (+ p 2))) 1511 ((>= i nunits) (values string index)) 1512 (declare (fixnum i index p)) 1513 (let* ((1st-unit (%get-unsigned-word pointer p))) 1407 (let* ((1st-unit (%native-u8-ref-u16 vector index))) 1514 1408 (declare (type (unsigned-byte 16) 1st-unit)) 1409 (incf index 2) 1515 1410 (let* ((char 1516 1411 (if (or (< 1st-unit #xd800) … … 1518 1413 (code-char 1st-unit) 1519 1414 (if (< 1st-unit #xdc00) 1520 (let* ((2nd-unit (%get-unsigned-byte pointer (incf p 2)))) 1415 (let* ((2nd-unit (%native-u8-ref-u16 vector index))) 1416 (declare (type (unsigned-byte 16) 2nd-unit)) 1417 (incf index 2) 1418 (if (and (>= 2nd-unit #xdc00) 1419 (< 2nd-unit #xe000)) 1420 (utf-16-combine-surrogate-pairs 1st-unit 2nd-unit))))))) 1421 (setf (schar string i) (or char #\Replacement_Character))))))) 1422 :memory-encode-function 1423 (nfunction 1424 native-utf-16-memory-encode 1425 (lambda (string pointer idx start end) 1426 (declare (fixnum idx)) 1427 (do* ((i start (1+ i))) 1428 ((>= i end) idx) 1429 (let* ((code (char-code (schar string i))) 1430 (highbits (- code #x10000))) 1431 (declare (type (mod #x110000) code) 1432 (fixnum highbits)) 1433 (cond ((< highbits 0) 1434 (setf (%get-unsigned-word pointer idx) code) 1435 (incf idx 2)) 1436 (t 1437 (setf (%get-unsigned-word pointer idx) (logior #xd800 (the fixnum (ash highbits -10)))) 1438 (incf idx 2) 1439 (setf (%get-unsigned-word pointer idx) (logior #xdc00 (the fixnum (logand highbits #x3ff)))) 1440 (incf idx 2))))))) 1441 :memory-decode-function 1442 (nfunction 1443 native-utf-16-memory-decode 1444 (lambda (pointer noctets idx string) 1445 (declare (fixnum noctets idx)) 1446 (do* ((i 0 (1+ i)) 1447 (end (+ idx noctets)) 1448 (index idx)) 1449 ((>= index end) index) 1450 (declare (fixnum i index p)) 1451 (let* ((1st-unit (%get-unsigned-word pointer index))) 1452 (declare (type (unsigned-byte 16) 1st-unit)) 1453 (incf index 2) 1454 (let* ((char 1455 (if (or (< 1st-unit #xd800) 1456 (>= 1st-unit #xe000)) 1457 (code-char 1st-unit) 1458 (if (< 1st-unit #xdc00) 1459 (let* ((2nd-unit (%get-unsigned-word pointer index))) 1521 1460 (declare (type (unsigned-byte 16) 2nd-unit)) 1522 1461 (incf index) 1523 1462 (if (and (>= 2nd-unit #xdc00) 1524 1463 (< 2nd-unit #xe000)) 1525 (code-char (the (unsigned-byte 21) 1526 (logior 1527 (the (unsigned-byte 20) (ash (the (unsigned-byte 10) 1528 (- 1st-unit #xd800)) 1529 10)) 1530 (the (unsigned-byte 10) (- 2nd-unit #xdc00))))))))))) 1531 (if char 1532 (setf (schar string i) char) 1533 (return (values nil idx)))))))) 1534 :units-in-string-function 1535 #'utf-16-units-in-string 1464 (utf-16-combine-surrogate-pairs 1st-unit 2nd-unit))))))) 1465 (setf (schar string i) (or char #\Replacement_Character))))))) 1466 :octets-in-string-function 1467 #'utf-16-octets-in-string 1536 1468 :length-of-vector-encoding-function 1537 1469 (nfunction 1538 1470 native-utf-16-length-of-vector-encoding 1539 (lambda (vector &optional (start 0) (end (length vector))) 1540 (declare (type (simple-array (unsigned-byte 16) (*)) vector)) 1471 (lambda (vector start end) 1472 (declare (type (simple-array (unsigned-byte 8) (*)) vector)) 1473 (declare (fixnum start end)) 1541 1474 (do* ((i start) 1542 (nchars 0 (1+ nchars))) 1543 ((>= i end) 1544 (if (= i end) nchars)) 1545 (let* ((code (aref vector i))) 1546 (declare (type (unsigned-byte 16) code)) 1547 (incf i 1548 (if (or (< code #xd800) 1549 (>= code #xe000)) 1550 1 1551 2)))))) 1475 (j (+ 2 i) (+ 2 i)) 1476 (nchars 0)) 1477 ((> j end) (values nchars i)) 1478 (declare (fixnum i j nchars)) 1479 (let* ((code (%native-u8-ref-u16 vector i)) 1480 (nexti (+ i (if (or (< code #xd800) 1481 (>= code #xdc00)) 1482 2 1483 4)))) 1484 (declare (type (unsigned-byte 16) code) 1485 (fixnum nexti)) 1486 (if (> nexti end) 1487 (return (values nchars i)) 1488 (setq i nexti nchars (1+ nchars))))))) 1552 1489 :length-of-memory-encoding-function 1553 1490 (nfunction 1554 1491 native-utf-16-length-of-memory-encoding 1555 (lambda (pointer n units &optional (start 0))1492 (lambda (pointer noctets start) 1556 1493 (do* ((i start) 1557 (p (+ start start) (+ p 2)) 1558 (nchars 0 (1+ nchars))) 1559 ((>= i nunits) 1560 (if (= i nunits) nchars)) 1561 (let* ((code (%get-unsigned-word pointer p))) 1562 (declare (type (unsigned-byte 16) code)) 1563 (incf i 1564 (incf i 1565 (if (or (< code #xd800) 1566 (>= code #xe000)) 1567 1 1568 2))))))) 1494 (j (+ i 2) (+ i 2)) 1495 (end (+ start noctets)) 1496 (nchars 0)) 1497 ((> j end) (values nchars i)) 1498 (let* ((code (%get-unsigned-word pointer i)) 1499 (nexti (+ i (if (or (< code #xd800) 1500 (>= code #xdc00)) 1501 2 1502 4)))) 1503 (declare (type (unsigned-byte 16) code) 1504 (fixnum nexti)) 1505 (if (> nexti end) 1506 (return (values nchars i)) 1507 (setq i nexti nchars (1+ nchars))))))) 1569 1508 :literal-char-code-limit #x10000 1570 1509 ) … … 1572 1511 ;;; utf-16, reversed byte order 1573 1512 (define-character-encoding #+big-endian-target :utf-16le #-big-endian-target :utf-16be 1574 #+little-endian-target1575 "A 16-bit, variable-length encoding in which characters with1513 #+little-endian-target 1514 "A 16-bit, variable-length encoding in which characters with 1576 1515 CHAR-CODEs less than #x10000 can be encoded in a single 16-bit 1577 1516 big-endian word and characters with larger codes can be encoded in a … … 1579 1518 is implicit in the encoding; byte-order-mark characters are not 1580 1519 interpreted on input or prepended to output." 1581 #+big-endian-target1582 "A 16-bit, variable-length encoding in which characters with1520 #+big-endian-target 1521 "A 16-bit, variable-length encoding in which characters with 1583 1522 CHAR-CODEs less than #x10000 can be encoded in a single 16-bit 1584 1523 little-endian word and characters with larger codes can be encoded in … … 1586 1525 data is implicit in the encoding; byte-order-mark characters are not 1587 1526 interpreted on input or prepended to output." 1588 :max-units-per-char 2 1589 :code-unit-size 16 1590 :native-endianness nil 1591 :stream-encode-function 1592 #'utf-16-stream-encode 1593 :stream-decode-function 1594 #'utf-16-stream-decode 1595 :vector-encode-function 1596 (nfunction 1597 reversed-utf-16-vector-encode 1598 (lambda (string vector idx &optional (start 0) (end (length string))) 1599 (declare (type (simple-array (unsigned-byte 16) (*)) vector) 1600 (fixnum idx)) 1601 (do* ((i start (1+ i))) 1602 ((>= i end) idx) 1603 (let* ((char (schar string i)) 1604 (code (char-code char)) 1605 (highbits (- code #x10000))) 1606 (declare (type (mod #x110000) code) 1607 (fixnum highbits)) 1608 (cond ((< highbits 0) 1609 (setf (aref vector idx) (%swap-u16 code)) 1610 (incf idx)) 1611 (t 1612 (setf (aref vector idx) (%swap-u16 (logior #xd800 (the fixnum (ash highbits -10))))) 1613 (incf idx) 1614 (setf (aref vector idx) (%swap-u16 (logior #xdc00 (the fixnum (logand highbits #x3ff))))) 1615 (incf idx))))))) 1616 :vector-decode-function 1617 (nfunction 1618 reversed-utf-16-vector-decode 1619 (lambda (vector idx nunits string) 1620 (declare (type (simple-array (unsigned-byte 16) (*)) vector) 1621 (type index idx)) 1622 (do* ((i 0 (1+ i)) 1623 (len (length vector)) 1624 (index idx (1+ index))) 1625 ((>= i nunits) (values string index)) 1626 (declare (fixnum i len index)) 1627 (if (>= index len) 1628 (values nil idx) 1629 (let* ((1st-unit (%swap-u16 (aref vector index)))) 1630 (declare (type (unsigned-byte 16) 1st-unit)) 1631 (let* ((char 1632 (if (or (< 1st-unit #xd800) 1633 (>= 1st-unit #xe000)) 1634 (code-char 1st-unit) 1635 (if (< 1st-unit #xdc00) 1636 (let* ((2nd-unit (%swap-u16 (aref vector (incf index))))) 1637 (declare (type (unsigned-byte 16) 2nd-unit)) 1638 (if (and (>= 2nd-unit #xdc00) 1639 (< 2nd-unit #xe000)) 1640 (code-char (the (unsigned-byte 21) 1641 (logior 1642 (the (unsigned-byte 20) (ash (the (unsigned-byte 10) 1643 (- 1st-unit #xd800)) 1644 10)) 1645 (the (unsigned-byte 10) (- 2nd-unit #xdc00))))))))))) 1646 (if char 1647 (setf (schar string i) char) 1648 (return (values nil idx))))))))) 1649 :memory-encode-function 1650 (nfunction 1651 reversed-utf-16-memory-encode 1652 (lambda (string pointer idx &optional (start 0) (end (length string))) 1653 (declare (fixnum idx)) 1654 (do* ((i start (1+ i))) 1655 ((>= i end) idx) 1656 (let* ((code (char-code (schar string i))) 1657 (highbits (- code #x10000)) 1658 (p (+ idx idx))) 1659 (declare (type (mod #x110000) code) 1660 (fixnum p highbits)) 1527 :max-units-per-char 2 1528 :code-unit-size 16 1529 :native-endianness nil 1530 :stream-encode-function 1531 #'utf-16-stream-encode 1532 :stream-decode-function 1533 #'utf-16-stream-decode 1534 :vector-encode-function 1535 (nfunction 1536 reversed-utf-16-vector-encode 1537 (lambda (string vector idx start end) 1538 (declare (type (simple-array (unsigned-byte 8) (*)) vector) 1539 (fixnum idx start end)) 1540 (do* ((i start (1+ i))) 1541 ((>= i end) idx) 1542 (declare (fixnum i)) 1543 (let* ((char (schar string i)) 1544 (code (char-code char)) 1545 (highbits (- code #x10000))) 1546 (declare (type (mod #x110000) code) 1547 (fixnum highbits)) 1661 1548 (cond ((< highbits 0) 1662 (setf (%get-unsigned-word pointer p) (%swap-u16 code)) 1663 (incf idx) 1664 (incf p 2)) 1665 1549 (setf (%reversed-u8-ref-u16 vector idx) code) 1550 (incf idx 2)) 1666 1551 (t 1667 (setf (%get-unsigned-word pointer p) (%swap-u16 (logior #xd800 (the fixnum (ash highbits -10))))) 1668 (incf idx) 1669 (incf p 2) 1670 (setf (%get-unsigned-word pointer p) (%swap-u16 (logior #xdc00 (the fixnum (logand highbits #x3ff))))) 1671 (incf idx) 1672 (incf p 2))))))) 1673 :memory-decode-function 1674 (nfunction 1675 reversed-utf-16-memory-decode 1676 (lambda (pointer nunits idx string) 1677 (declare (fixnum nunits idx)) 1678 (do* ((i 0 (1+ i)) 1679 (index idx (1+ index)) 1680 (p (+ index index) (+ p 2))) 1681 ((>= i nunits) (values string index)) 1682 (declare (fixnum i index p)) 1683 (let* ((1st-unit (%swap-u16 (%get-unsigned-word pointer p)))) 1684 (declare (type (unsigned-byte 16) 1st-unit)) 1685 (let* ((char 1686 (if (or (< 1st-unit #xd800) 1687 (>= 1st-unit #xe000)) 1688 (code-char 1st-unit) 1689 (if (< 1st-unit #xdc00) 1690 (let* ((2nd-unit (%swap-u16 (%get-unsigned-byte pointer (incf p 2))))) 1691 (declare (type (unsigned-byte 16) 2nd-unit)) 1692 (incf index) 1693 (if (and (>= 2nd-unit #xdc00) 1694 (< 2nd-unit #xe000)) 1695 (code-char (the (unsigned-byte 21) 1696 (logior 1697 (the (unsigned-byte 20) (ash (the (unsigned-byte 10) 1698 (- 1st-unit #xd800)) 1699 10)) 1700 (the (unsigned-byte 10) (- 2nd-unit #xdc00))))))))))) 1701 (if char 1702 (setf (schar string i) char) 1703 (return (values nil idx)))))))) 1704 :units-in-string-function 1705 #'utf-16-units-in-string 1706 :length-of-vector-encoding-function 1707 (nfunction 1708 reversed-utf-16-length-of-vector-encoding 1709 (lambda (vector &optional (start 0) (end (length vector))) 1710 (declare (type (simple-array (unsigned-byte 16) (*)) vector)) 1711 (do* ((i start) 1712 (nchars 0 (1+ nchars))) 1713 ((>= i end) 1714 (if (= i end) nchars)) 1715 (let* ((code (%swap-u16 (aref vector i)))) 1716 (declare (type (unsigned-byte 16) code)) 1717 (incf i 1718 (if (or (< code #xd800) 1719 (>= code #xe000)) 1720 1 1721 2)))))) 1722 :length-of-memory-encoding-function 1723 (nfunction 1724 reversed-utf-16-length-of-memory-encoding 1725 (lambda (pointer nunits &optional (start 0)) 1726 (do* ((i start) 1727 (p (+ start start) (+ p 2)) 1728 (nchars 0 (1+ nchars))) 1729 ((>= i nunits) 1730 (if (= i nunits) nchars)) 1731 (let* ((code (%swap-u16 (%get-unsigned-word pointer p)))) 1732 (declare (type (unsigned-byte 8) code)) 1733 (incf i 1734 (incf i 1735 (if (or (< code #xd800) 1736 (>= code #xe000)) 1737 1 1738 2))))))) 1739 :literal-char-code-limit #x10000 1740 ) 1552 (let* ((firstword (logior #xd800 (the fixnum (ash highbits -10)))) 1553 (secondword (logior #xdc00 (the fixnum (logand highbits #x3ff))))) 1554 (declare (type (unsigned-byte 16) firstword secondword)) 1555 (setf (%reversed-u8-ref-u16 vector idx) firstword 1556 (%reversed-u8-ref-u16 vector (the fixnum (+ idx 2))) secondword) 1557 (incf idx 4)))))))) 1558 :vector-decode-function 1559 (nfunction 1560 reversed-utf-16-vector-decode 1561 (lambda (vector idx noctets string) 1562 (declare (type (simple-array (unsigned-byte 8) (*)) vector) 1563 (type index idx)) 1564 (do* ((i 0 (1+ i)) 1565 (end (+ idx noctets)) 1566 (index idx)) 1567 ((= index end) index) 1568 (declare (fixnum i len index)) 1569 (let* ((1st-unit (%reversed-u8-ref-u16 vector index))) 1570 (declare (type (unsigned-byte 16) 1st-unit)) 1571 (incf index 2) 1572 (let* ((char 1573 (if (or (< 1st-unit #xd800) 1574 (>= 1st-unit #xe000)) 1575 (code-char 1st-unit) 1576 (if (< 1st-unit #xdc00) 1577 (let* ((2nd-unit (%reversed-u8-ref-u16 vector index))) 1578 (declare (type (unsigned-byte 16) 2nd-unit)) 1579 (incf index 2) 1580 (if (and (>= 2nd-unit #xdc00) 1581 (< 2nd-unit #xe000)) 1582 (utf-16-combine-surrogate-pairs 1st-unit 2nd-unit))))))) 1583 (setf (schar string i) (or char #\Replacement_Character))))))) 1584 :memory-encode-function 1585 (nfunction 1586 reversed-utf-16-memory-encode 1587 (lambda (string pointer idx start end) 1588 (declare (fixnum idx)) 1589 (do* ((i start (1+ i))) 1590 ((>= i end) idx) 1591 (let* ((code (char-code (schar string i))) 1592 (highbits (- code #x10000))) 1593 (declare (type (mod #x110000) code) 1594 (fixnum highbits)) 1595 (cond ((< highbits 0) 1596 (setf (%get-unsigned-word pointer idx) (%swap-u16 code)) 1597 (incf idx 2)) 1598 (t 1599 (setf (%get-unsigned-word pointer idx) (%swap-u16 (logior #xd800 (the fixnum (ash highbits -10))))) 1600 (incf idx 2) 1601 (setf (%get-unsigned-word pointer idx) (%swap-u16 (logior #xdc00 (the fixnum (logand highbits #x3ff))))) 1602 (incf idx 2))))))) 1603 :memory-decode-function 1604 (nfunction 1605 reversed-utf-16-memory-decode 1606 (lambda (pointer noctets idx string) 1607 (declare (fixnum noctets idx)) 1608 (do* ((i 0 (1+ i)) 1609 (end (+ idx noctets)) 1610 (index idx)) 1611 ((>= index end) index) 1612 (declare (fixnum i index p)) 1613 (let* ((1st-unit (%swap-u16 (%get-unsigned-word pointer index)))) 1614 (declare (type (unsigned-byte 16) 1st-unit)) 1615 (incf index 2) 1616 (let* ((char 1617 (if (or (< 1st-unit #xd800) 1618 (>= 1st-unit #xe000)) 1619 (code-char 1st-unit) 1620 (if (< 1st-unit #xdc00) 1621 (let* ((2nd-unit (%swap-u16 (%get-unsigned-word pointer index)))) 1622 (declare (type (unsigned-byte 16) 2nd-unit)) 1623 (incf index) 1624 (if (and (>= 2nd-unit #xdc00) 1625 (< 2nd-unit #xe000)) 1626 (utf-16-combine-surrogate-pairs 1st-unit 2nd-unit))))))) 1627 (setf (schar string i) (or char #\Replacement_Character))))))) 1628 :octets-in-string-function 1629 #'utf-16-octets-in-string 1630 :length-of-vector-encoding-function 1631 (nfunction 1632 reversed-utf-16-length-of-vector-encoding 1633 (lambda (vector start end) 1634 (declare (type (simple-array (unsigned-byte 8) (*)) vector)) 1635 (declare (fixnum start end)) 1636 (do* ((i start) 1637 (j (+ 2 i) (+ 2 i)) 1638 (nchars 0)) 1639 ((> j end) (values nchars i)) 1640 (declare (fixnum i j nchars)) 1641 (let* ((code (%reversed-u8-ref-u16 vector i)) 1642 (nexti (+ i (if (or (< code #xd800) 1643 (>= code #xdc00)) 1644 2 1645 4)))) 1646 (declare (type (unsigned-byte 16) code) 1647 (fixnum nexti)) 1648 (if (> nexti end) 1649 (return (values nchars i)) 1650 (setq i nexti nchars (1+ nchars))))))) 1651 :length-of-memory-encoding-function 1652 (nfunction 1653 reversed-utf-16-length-of-memory-encoding 1654 (lambda (pointer noctets start) 1655 (do* ((i start) 1656 (j (+ i 2) (+ i 2)) 1657 (end (+ start noctets)) 1658 (nchars 0)) 1659 ((> j end) (values nchars i)) 1660 (let* ((code (%swap-u16 (%get-unsigned-word pointer i))) 1661 (nexti (+ i (if (or (< code #xd800) 1662 (>= code #xdc00)) 1663 2 1664 4)))) 1665 (declare (type (unsigned-byte 16) code) 1666 (fixnum nexti)) 1667 (if (> nexti end) 1668 (return (values nchars i)) 1669 (setq i nexti nchars (1+ nchars))))))) 1670 :literal-char-code-limit #x10000 1671 ) 1741 1672 1742 1673 ;;; UTF-16. Memory and vector functions determine endianness of … … 1771 1702 (nfunction 1772 1703 utf-16-vector-encode 1773 (lambda (string vector idx &optional (start 0) (end (length string)))1704 (lambda (string vector idx start end) 1774 1705 (declare (type (simple-array (unsigned-byte 8) (*)) vector) 1775 1706 (fixnum idx)) 1776 1707 (when (> end start) 1777 (setf (aref vector idx) byte-order-mark-char-code) 1778 (incf idx)) 1779 (do* ((i start (1+ i))) 1780 ((>= i end) idx) 1781 (let* ((char (schar string i)) 1782 (code (char-code char)) 1783 (highbits (- code #x10000))) 1784 (declare (type (mod #x110000) code) 1785 (fixnum highbits)) 1786 (cond ((< highbits 0) 1787 (setf (aref vector idx) code) 1788 (incf idx)) 1789 (t 1790 (setf (aref vector idx) (logior #xd800 (the fixnum (ash highbits -10)))) 1791 (incf idx) 1792 (setf (aref vector idx) (logior #xdc00 (the fixnum (logand highbits #x3ff)))) 1793 (incf idx))))))) 1708 (setf (%native-u8-ref-u16 vector idx) byte-order-mark-char-code) 1709 (incf idx 2)) 1710 (do* ((i start (1+ i))) 1711 ((>= i end) idx) 1712 (declare (fixnum i)) 1713 (let* ((char (schar string i)) 1714 (code (char-code char)) 1715 (highbits (- code #x10000))) 1716 (declare (type (mod #x110000) code) 1717 (fixnum highbits)) 1718 (cond ((< highbits 0) 1719 (setf (%native-u8-ref-u16 vector idx) code) 1720 (incf idx 2)) 1721 (t 1722 (let* ((firstword (logior #xd800 (the fixnum (ash highbits -10)))) 1723 (secondword (logior #xdc00 (the fixnum (logand highbits #x3ff))))) 1724 (declare (type (unsigned-byte 16) firstword secondword)) 1725 (setf (%native-u8-ref-u16 vector idx) firstword 1726 (%native-u8-ref-u16 vector (the fixnum (+ idx 2))) secondword) 1727 (incf idx 4)))))))) 1794 1728 :vector-decode-function 1795 1729 (nfunction 1796 1730 utf-16-vector-decode 1797 (lambda (vector idx n units string)1731 (lambda (vector idx noctets string) 1798 1732 (declare (type (simple-array (unsigned-byte 16) (*)) vector) 1799 1733 (type index idx)) 1800 (let* ((len (length vector)) 1801 (swap (if (> len idx) 1802 (case (aref vector idx) 1734 (let* ((swap (if (>= noctets 2) 1735 (case (%native-u8-ref-u16 vector idx) 1803 1736 (#.byte-order-mark-char-code 1804 (incf idx ) nil)1737 (incf idx 2) nil) 1805 1738 (#.swapped-byte-order-mark-char-code 1806 (incf idx t))1739 (incf idx 2) t) 1807 1740 (t #+little-endian-target t))))) 1808 1809 1741 (do* ((i 0 (1+ i)) 1810 (index idx (1+ index))) 1811 ((>= i nunits) (values string index)) 1742 (end (+ idx noctets)) 1743 (index idx)) 1744 ((= index end) index) 1812 1745 (declare (fixnum i len index)) 1813 (if (>= index len) 1814 (values nil idx) 1815 (let* ((1st-unit (aref vector index))) 1816 (declare (type (unsigned-byte 16) 1st-unit)) 1817 (if swap (setq 1st-unit (%swap-u16 1st-unit))) 1818 (let* ((char 1819 (if (or (< 1st-unit #xd800) 1820 (>= 1st-unit #xe000)) 1821 (code-char 1st-unit) 1822 (if (< 1st-unit #xdc00) 1823 (let* ((2nd-unit (aref vector (incf index)))) 1824 (declare (type (unsigned-byte 16) 2nd-unit)) 1825 (if swap (setq 2nd-unit (%swap-u16 2nd-unit))) 1826 (if (and (>= 2nd-unit #xdc00) 1827 (< 2nd-unit #xe000)) 1828 (code-char (the (unsigned-byte 21) 1829 (logior 1830 (the (unsigned-byte 20) (ash (the (unsigned-byte 10) 1831 (- 1st-unit #xd800)) 1832 10)) 1833 (the (unsigned-byte 10) (- 2nd-unit #xdc00))))))))))) 1834 (if char 1835 (setf (schar string i) char) 1836 (return (values nil idx)))))))))) 1746 (let* ((1st-unit (if swap 1747 (%reversed-u8-ref-u16 vector index) 1748 (%native-u8-ref-u16 vector index)))) 1749 (declare (type (unsigned-byte 16) 1st-unit)) 1750 (incf index 2) 1751 (let* ((char 1752 (if (or (< 1st-unit #xd800) 1753 (>= 1st-unit #xe000)) 1754 (code-char 1st-unit) 1755 (if (< 1st-unit #xdc00) 1756 (let* ((2nd-unit (if swap 1757 (%reversed-u8-ref-u16 vector index) 1758 (%native-u8-ref-u16 vector index)))) 1759 (declare (type (unsigned-byte 16) 2nd-unit)) 1760 (incf index 2) 1761 (if (and (>= 2nd-unit #xdc00) 1762 (< 2nd-unit #xe000)) 1763 (utf-16-combine-surrogate-pairs 1st-unit 2nd-unit))))))) 1764 (setf (schar string i) (or char #\Replacement_Character)))))))) 1837 1765 :memory-encode-function 1838 1766 (nfunction 1839 1767 utf-16-memory-encode 1840 (lambda (string pointer idx &optional (start 0) (end (length string)))1768 (lambda (string pointer idx start end) 1841 1769 (declare (fixnum idx)) 1842 1770 (when (> end start) 1843 (setf (%get-unsigned-word pointer (+ idx idx))1771 (setf (%get-unsigned-word pointer idx) 1844 1772 byte-order-mark-char-code) 1845 (incf idx ))1773 (incf idx 2)) 1846 1774 (do* ((i start (1+ i))) 1847 1775 ((>= i end) idx) 1848 1776 (let* ((code (char-code (schar string i))) 1849 (highbits (- code #x10000)) 1850 (p (+ idx idx))) 1777 (highbits (- code #x10000))) 1851 1778 (declare (type (mod #x110000) code) 1852 1779 (fixnum p highbits)) 1853 1780 (cond ((< highbits 0) 1854 (setf (%get-unsigned-word pointer p) code) 1855 (incf idx) 1856 (incf p 2)) 1857 1781 (setf (%get-unsigned-word pointer idx) code) 1782 (incf idx 2)) 1858 1783 (t 1859 (setf (%get-unsigned-word pointer p) (logior #xd800 (the fixnum (ash highbits -10)))) 1860 (incf idx) 1861 (incf p 2) 1862 (setf (%get-unsigned-word pointer p) (logior #xdc00 (the fixnum (logand highbits #x3ff)))) 1863 (incf idx) 1864 (incf p 2))))))) 1784 (setf (%get-unsigned-word pointer idx) (logior #xd800 (the fixnum (ash highbits -10)))) 1785 1786 (setf (%get-unsigned-word pointer (the fixnum (+ idx 2))) (logior #xdc00 (the fixnum (logand highbits #x3ff)))) 1787 (incf idx 4))))))) 1865 1788 :memory-decode-function 1866 1789 (nfunction 1867 1790 utf-16-memory-decode 1868 (lambda (pointer n units idx string)1791 (lambda (pointer noctets idx string) 1869 1792 (declare (fixnum nunits idx)) 1870 (let* ((swap (when (> n units 0)1871 (case (%get-unsigned-word pointer (+ idx idx))1793 (let* ((swap (when (> noctets 1) 1794 (case (%get-unsigned-word pointer idx) 1872 1795 (#.byte-order-mark-char-code 1873 (incf idx )1874 (decf n units)1796 (incf idx 2) 1797 (decf noctets 2) 1875 1798 nil) 1876 1799 (#.swapped-byte-order-mark-char-code 1877 (incf idx )1878 (decf n units)1800 (incf idx 2) 1801 (decf noctets 2) 1879 1802 t) 1880 1803 (t #+little-endian-target t))))) 1881 1804 (do* ((i 0 (1+ i)) 1882 ( index idx (1+ index))1883 ( p (+ index index) (+ p 2)))1884 ((>= i nunits) (values string index))1805 (end (+ idx noctets)) 1806 (index idx )) 1807 ((>= index end) index) 1885 1808 (declare (fixnum i index p)) 1886 (let* ((1st-unit (%get-unsigned-word pointer p)))1809 (let* ((1st-unit (%get-unsigned-word pointer index))) 1887 1810 (declare (type (unsigned-byte 16) 1st-unit)) 1811 (incf index 2) 1888 1812 (if swap (setq 1st-unit (%swap-u16 1st-unit))) 1889 1813 (let* ((char … … 1892 1816 (code-char 1st-unit) 1893 1817 (if (< 1st-unit #xdc00) 1894 (let* ((2nd-unit (%get-unsigned-byte pointer (incf p 2))))1818 (let* ((2nd-unit (%get-unsigned-byte pointer index))) 1895 1819 (declare (type (unsigned-byte 16) 2nd-unit)) 1896 1820 (if swap (setq 2nd-unit (%swap-u16 2nd-unit))) 1897 (incf index )1821 (incf index 2) 1898 1822 (if (and (>= 2nd-unit #xdc00) 1899 1823 (< 2nd-unit #xe000)) 1900 (code-char (the (unsigned-byte 21) 1901 (logior 1902 (the (unsigned-byte 20) (ash (the (unsigned-byte 10) 1903 (- 1st-unit #xd800)) 1904 10)) 1905 (the (unsigned-byte 10) (- 2nd-unit #xdc00))))))))))) 1906 (if char 1907 (setf (schar string i) char) 1908 (return (values nil idx))))))))) 1909 :units-in-string-function 1824 (utf-16-combine-surrogate-pairs 1st-unit 2nd-unit))))))) 1825 (setf (schar string i) (or char #\Replacement_Character)))))))) 1826 :octets-in-string-function 1910 1827 #'(lambda (&rest args) 1911 1828 (declare (dynamic-extent args)) 1912 ;; Add onefor the BOM.1913 ( 1+ (apply #'utf-16-units-in-string args)))1829 ;; Add two for the BOM. 1830 (+ 2 (apply #'utf-16-octets-in-string args))) 1914 1831 :length-of-vector-encoding-function 1915 1832 (nfunction 1916 1833 utf-16-length-of-vector-encoding 1917 (lambda (vector &optional (start 0) (end (length vector)))1834 (lambda (vector start end) 1918 1835 (declare (type (simple-array (unsigned-byte 16) (*)) vector)) 1919 1836 (let* ((swap (when (> end start) 1920 (case ( arefvector start)1837 (case (%native-u8-ref-u16 vector start) 1921 1838 (#.byte-order-mark-char-code 1922 (incf start )1839 (incf start 2) 1923 1840 nil) 1924 1841 (#.swapped-byte-order-mark-char-code 1925 (incf start) 1842 (incf start 2) 1843 t) 1844 (t #+little-endian-target t))))) 1845 (do* ((i start) 1846 (j (+ 2 i) (+ 2 i)) 1847 (nchars 0)) 1848 ((> j end) 1849 (if (= i end) (values nchars i)) 1850 (let* ((code (if swap 1851 (%reversed-u8-ref-u16 vector i) 1852 (%native-u8-ref-u16 vector i))) 1853 (nexti (+ i (if (or (< code #xd800) 1854 (>= code #xdc00)) 1855 2 1856 4)))) 1857 (declare (type (unsigned-byte 16) code) 1858 (fixnum nexti)) 1859 (if (> nexti end) 1860 (return (values nchars i)) 1861 (setq i nexti nchars (1+ nchars))))))))) 1862 :length-of-memory-encoding-function 1863 (nfunction 1864 utf-16-length-of-memory-encoding 1865 (lambda (pointer noctets start) 1866 (let* ((swap (when (>= noctets 2) 1867 (case (%get-unsigned-word pointer (+ start start)) 1868 (#.byte-order-mark-char-code 1869 (incf start 2) 1870 (decf noctets 2) 1871 nil) 1872 (#.swapped-byte-order-mark-char-code 1873 (incf start 2) 1874 (decf noctets 2) 1926 1875 t) 1927 1876 (t #+little-endian-target t))))) 1928 1877 (do* ((i start) 1929 1878 (nchars 0 (1+ nchars))) 1930 ((>= i end)1931 (if (= i end) nchars))1932 (let* ((code ( aref vector i)))1879 ((>= i noctets) 1880 (if (= i noctets) nchars)) 1881 (let* ((code (%get-unsigned-word pointer i))) 1933 1882 (declare (type (unsigned-byte 16) code)) 1934 1883 (if swap (setq code (%swap-u16 code))) 1935 1884 (incf i 1936 1885 (if (or (< code #xd800) 1937 (>= code #xe000)) 1938 1 1939 2))))))) 1940 :length-of-memory-encoding-function 1941 (nfunction 1942 utf-16-length-of-memory-encoding 1943 (lambda (pointer nunits &optional (start 0)) 1944 (let* ((swap (when (> nunits 1) 1945 (case (%get-unsigned-word pointer (+ start start)) 1946 (#.byte-order-mark-char-code 1947 (incf start) 1948 (decf nunits) 1949 nil) 1950 (#.swapped-byte-order-mark-char-code 1951 (incf start) 1952 (decf nunits) 1953 t) 1954 (t #+little-endian-target t))))) 1955 (do* ((i start) 1956 (p (+ start start) (+ p 2)) 1957 (nchars 0 (1+ nchars))) 1958 ((>= i nunits) 1959 (if (= i nunits) nchars)) 1960 (let* ((code (%get-unsigned-word pointer p))) 1961 (declare (type (unsigned-byte 16) code)) 1962 (if swap (setq code (%swap-u16 code))) 1963 (incf i 1964 (incf i 1965 (if (or (< code #xd800) 1966 (>= code #xe000)) 1967 1 1968 2)))))))) 1886 (>= code #xdc00)) 1887 2 1888 4))))))) 1969 1889 :literal-char-code-limit #x10000 1970 1890 :use-byte-order-mark … … 1990 1910 1991 1911 1992 (defun ucs-2-units-in-string (string &optional (start 0) (end (length string))) 1993 (when (>= end start) 1994 (do* ((i start (1+ i))) 1995 ((= i end) (- end start)) 1996 (let* ((code (char-code (schar string i)))) 1997 (declare (type (mod #x110000) code)) 1998 (unless (< code #x10000) (return nil)))))) 1912 (defun ucs-2-octets-in-string (string start end) 1913 (declare (ignore string)) 1914 (if (>= end start) 1915 (* 2 (- end start)) 1916 0)) 1917 1999 1918 2000 1919 ;;; UCS-2, native byte order … … 2022 1941 (nfunction 2023 1942 native-ucs-2-vector-encode 2024 (lambda (string vector idx &optional (start 0) (end (length string)))2025 (declare (type (simple-array (unsigned-byte 16) (*)) vector)1943 (lambda (string vector idx start end) 1944 (declare (type (simple-array (unsigned-byte 8) (*)) vector) 2026 1945 (fixnum idx)) 2027 1946 (do* ((i start (1+ i))) … … 2029 1948 (let* ((char (schar string i)) 2030 1949 (code (char-code char))) 2031 (declare (type (mod #x110000) code) 2032 (fixnum highbits)) 2033 (cond ((< code #x10000) 2034 (setf (aref vector idx) code) 2035 (incf idx)) 2036 (t (return nil))))))) 1950 (declare (type (mod #x110000) code)) 1951 (when (>= code #x10000) 1952 (setq code (char-code #\Replacement_Character))) 1953 (setf (%native-u8-ref-u16 vector idx) code) 1954 (incf idx 2))))) 2037 1955 :vector-decode-function 2038 1956 (nfunction 2039 1957 native-ucs-2-vector-decode 2040 (lambda (vector idx n units string)2041 (declare (type (simple-array (unsigned-byte 16) (*)) vector)1958 (lambda (vector idx noctets string) 1959 (declare (type (simple-array (unsigned-byte 8) (*)) vector) 2042 1960 (type index idx)) 2043 1961 (do* ((i 0 (1+ i)) 2044 ( len (length vector))2045 (index idx ( 1+index)))2046 ((>= i nunits) (values string index))1962 (end (+ idx noctets)) 1963 (index idx (+ 2 index))) 1964 ((>= index end) index) 2047 1965 (declare (fixnum i len index)) 2048 (if (>= index len) 2049 (values nil idx) 2050 (let* ((char (code-char (the (unsigned-byte 16) (aref vector index))))) 2051 (if char 2052 (setf (schar string i) char) 2053 (return (values nil idx)))))))) 1966 (setf (schar string i) 1967 (or (code-char (%native-u8-ref-u16 vector index)) 1968 #\Replacement_Character))))) 2054 1969 :memory-encode-function 2055 1970 (nfunction 2056 1971 native-ucs-2-memory-encode 2057 (lambda (string pointer idx &optional (start 0) (end (length string)))1972 (lambda (string pointer idx start end) 2058 1973 (declare (fixnum idx)) 2059 1974 (do* ((i start (1+ i))) 2060 1975 ((>= i end) idx) 2061 (let* ((code (char-code (schar string i))) 2062 (p (+ idx idx))) 2063 (declare (type (mod #x110000) code) 2064 (fixnum p highbits)) 2065 (cond ((< code #x10000) 2066 (setf (%get-unsigned-word pointer p) code) 2067 (incf idx) 2068 (incf p 2)) 2069 (t 2070 (return nil))))))) 1976 (let* ((code (char-code (schar string i)))) 1977 (declare (type (mod #x110000) code)) 1978 (setf (%get-unsigned-word pointer idx) 1979 (if (>= code #x10000) 1980 (char-code #\Replacement_Character) 1981 code)) 1982 (incf idx 2))))) 2071 1983 :memory-decode-function 2072 1984 (nfunction 2073 1985 native-ucs-2-memory-decode 2074 (lambda (pointer n units idx string)2075 (declare (fixnum n units idx))1986 (lambda (pointer noctets idx string) 1987 (declare (fixnum noctets idx)) 2076 1988 (do* ((i 0 (1+ i)) 2077 (index idx (1+ index)) 2078 (p (+ index index) (+ p 2))) 2079 ((>= i nunits) (values string index)) 2080 (declare (fixnum i index p)) 2081 (let* ((1st-unit (%get-unsigned-word pointer p))) 1989 (index idx (+ index 2))) 1990 ((>= i noctets) index) 1991 (declare (fixnum i index)) 1992 (let* ((1st-unit (%get-unsigned-word pointer index))) 2082 1993 (declare (type (unsigned-byte 16) 1st-unit)) 2083 (let* ((char (code-char 1st-unit))) 2084 (setf (schar string i) char) 2085 (return (values nil idx))))))) 2086 :units-in-string-function 2087 #'ucs-2-units-in-string 1994 (setf (schar string i) (or (char-code 1st-unit) #\Replacement_Character)))))) 1995 :octets-in-string-function 1996 #'ucs-2-octets-in-string 2088 1997 :length-of-vector-encoding-function 2089 1998 (nfunction 2090 1999 native-ucs-2-length-of-vector-encoding 2091 (lambda (vector &optional (start 0) (end (length vector)))2092 (d o* ((i start (1+ i)))2093 ((>= i end) (if (= i end) (- end start)))2094 (let* ((code (aref vector i)))2095 (unless (code-char code)2096 (return nil))))))2000 (lambda (vector start end) 2001 (declare (ignore vector)) 2002 (do* ((i start (1+ i)) 2003 (j (+ i 2) (+ i 2)) 2004 (nchars 0 (1+ nchars))) 2005 ((> j end) (values nchars i))))) 2097 2006 :length-of-memory-encoding-function 2098 2007 (nfunction 2099 2008 native-ucs-2-length-of-memory-encoding 2100 (lambda (pointer n units &optional (start 0))2101 (declare (ignore pointer start))2102 nunits))2009 (lambda (pointer noctets start) 2010 (declare (ignore pointer)) 2011 (values (floor noctets 2) (+ start noctets)))) 2103 2012 :literal-char-code-limit #x10000 2104 2013 ) … … 2128 2037 (nfunction 2129 2038 reversed-ucs-2-vector-encode 2130 (lambda (string vector idx &optional (start 0) (end (length string)))2131 (declare (type (simple-array (unsigned-byte 16) (*)) vector)2039 (lambda (string vector idx start end) 2040 (declare (type (simple-array (unsigned-byte 8) (*)) vector) 2132 2041 (fixnum idx)) 2133 2042 (do* ((i start (1+ i))) … … 2135 2044 (let* ((char (schar string i)) 2136 2045 (code (char-code char))) 2137 (declare (type (mod #x110000) code) 2138 (fixnum highbits)) 2139 (cond ((< code #x10000) 2140 (setf (aref vector idx) (%swap-u16 code)) 2141 (incf idx)) 2142 (t (return nil))))))) 2046 (declare (type (mod #x110000) code)) 2047 (when (>= code #x10000) 2048 (setq code (char-code #\Replacement_Character))) 2049 (setf (%reversed-u8-ref-u16 vector idx) code) 2050 (incf idx 2))))) 2143 2051 :vector-decode-function 2144 2052 (nfunction 2145 2053 reversed-ucs-2-vector-decode 2146 (lambda (vector idx n units string)2147 (declare (type (simple-array (unsigned-byte 16) (*)) vector)2054 (lambda (vector idx noctets string) 2055 (declare (type (simple-array (unsigned-byte 8) (*)) vector) 2148 2056 (type index idx)) 2149 2057 (do* ((i 0 (1+ i)) 2150 ( len (length vector))2151 (index idx ( 1+index)))2152 ((>= i nunits) (values string index))2058 (end (+ idx noctets)) 2059 (index idx (+ 2 index))) 2060 ((>= index end) index) 2153 2061 (declare (fixnum i len index)) 2154 (if (>= index len) 2155 (values nil idx) 2156 (let* ((char (code-char (the (unsigned-byte 16) (%swap-u16 (aref vector index)))))) 2157 (if char 2158 (setf (schar string i) char) 2159 (return (values nil idx)))))))) 2062 (setf (schar string i) 2063 (or (code-char (%reversed-u8-ref-u16 vector index)) 2064 #\Replacement_Character))))) 2160 2065 :memory-encode-function 2161 2066 (nfunction 2162 2067 reversed-ucs-2-memory-encode 2163 (lambda (string pointer idx &optional (start 0) (end (length string)))2068 (lambda (string pointer idx start end) 2164 2069 (declare (fixnum idx)) 2165 2070 (do* ((i start (1+ i))) 2166 2071 ((>= i end) idx) 2167 (let* ((code (char-code (schar string i))) 2168 (p (+ idx idx))) 2169 (declare (type (mod #x110000) code) 2170 (fixnum p highbits)) 2171 (cond ((< code #x10000) 2172 (setf (%get-unsigned-word pointer p) (%swap-u16 code)) 2173 (incf idx) 2174 (incf p 2)) 2175 (t 2176 (return nil))))))) 2072 (let* ((code (char-code (schar string i)))) 2073 (declare (type (mod #x110000) code)) 2074 (setf (%get-unsigned-word pointer idx) 2075 (if (>= code #x10000) 2076 (%swap-u16 (char-code #\Replacement_Character)) 2077 (%swap-u16 code))) 2078 (incf idx 2))))) 2177 2079 :memory-decode-function 2178 2080 (nfunction 2179 2081 reversed-ucs-2-memory-decode 2180 (lambda (pointer n units idx string)2181 (declare (fixnum n units idx))2082 (lambda (pointer noctets idx string) 2083 (declare (fixnum noctets idx)) 2182 2084 (do* ((i 0 (1+ i)) 2183 (index idx (1+ index)) 2184 (p (+ index index) (+ p 2))) 2185 ((>= i nunits) (values string index)) 2186 (declare (fixnum i index p)) 2187 (let* ((1st-unit (%swap-u16 (%get-unsigned-word pointer p)))) 2085 (index idx (+ index 2))) 2086 ((>= i noctets) index) 2087 (declare (fixnum i index)) 2088 (let* ((1st-unit (%swap-u16 (%get-unsigned-word pointer index)))) 2188 2089 (declare (type (unsigned-byte 16) 1st-unit)) 2189 (let* ((char (code-char 1st-unit))) 2190 (if char 2191 (setf (schar string i) char) 2192 (return (values nil idx)))))))) 2193 :units-in-string-function 2194 #'ucs-2-units-in-string 2090 (setf (schar string i) (or (char-code 1st-unit) #\Replacement_Character)))))) 2091 :octets-in-string-function 2092 #'ucs-2-octets-in-string 2195 2093 :length-of-vector-encoding-function 2196 2094 (nfunction 2197 2095 reversed-ucs-2-length-of-vector-encoding 2198 (lambda (vector &optional (start 0) (end (length vector))) 2199 (when (>= end start) 2200 (- end start)))) 2096 (lambda (vector start end) 2097 (declare (ignore vector)) 2098 (do* ((i start (1+ i)) 2099 (j (+ i 2) (+ i 2)) 2100 (nchars 0 (1+ nchars))) 2101 ((> j end) (values nchars i))))) 2201 2102 :length-of-memory-encoding-function 2202 2103 (nfunction 2203 2104 reversed-ucs-2-length-of-memory-encoding 2204 (lambda (pointer n units &optionalstart)2205 (declare (ignore pointer start))2206 nunits))2105 (lambda (pointer noctets start) 2106 (declare (ignore pointer)) 2107 (values (floor noctets 2) (+ start noctets)))) 2207 2108 :literal-char-code-limit #x10000 2208 2109 ) … … 2225 2126 (nfunction 2226 2127 ucs-2-vector-encode 2227 (lambda (string vector idx &optional (start 0) (end (length string)))2128 (lambda (string vector idx start end) 2228 2129 (declare (type (simple-array (unsigned-byte 8) (*)) vector) 2229 2130 (fixnum idx)) 2230 2131 (when (> end start) 2231 (setf ( arefvector idx) byte-order-mark-char-code)2232 (incf idx ))2132 (setf (%native-u8-ref-u16 vector idx) byte-order-mark-char-code) 2133 (incf idx 2)) 2233 2134 (do* ((i start (1+ i))) 2234 2135 ((>= i end) idx) 2235 2136 (let* ((char (schar string i)) 2236 2137 (code (char-code char))) 2237 (declare (type (mod #x110000) code) 2238 (fixnum highbits)) 2239 (cond ((< code #x10000) 2240 (setf (aref vector idx) code) 2241 (incf idx)) 2242 (t 2243 (return nil))))))) 2138 (declare (type (mod #x110000) code)) 2139 (when (>= code #x10000) 2140 (setq code (char-code #\Replacement_Character))) 2141 (setf (%native-u8-ref-u16 vector idx) code) 2142 (incf idx 2))))) 2244 2143 :vector-decode-function 2245 2144 (nfunction 2246 2145 ucs-2-vector-decode 2247 (lambda (vector idx n units string)2248 (declare (type (simple-array (unsigned-byte 16) (*)) vector)2249 (type index idx) )2250 (let* ((len (length vector))2251 (swap (if (> len idx)2252 (case ( arefvector idx)2146 (lambda (vector idx noctets string) 2147 (declare (type (simple-array (unsigned-byte 8) (*)) vector) 2148 (type index idx) 2149 (fixnum noctets)) 2150 (let* ((swap (if (> noctets 1) 2151 (case (%native-u8-ref-u16 vector idx) 2253 2152 (#.byte-order-mark-char-code 2254 (incf idx ) nil)2153 (incf idx 2) (decf noctets 2) nil) 2255 2154 (#.swapped-byte-order-mark-char-code 2256 (incf idx t))2257 (t #+little-endian-target t)))))2155 (incf idx 2) (decf noctets 2) t) 2156 (t #+little-endian-target t))))) 2258 2157 2259 2158 (do* ((i 0 (1+ i)) 2159 (end (+ idx noctets)) 2260 2160 (index idx (1+ index))) 2261 ((>= i nunits) (values string index))2161 ((>= index end) index) 2262 2162 (declare (fixnum i len index)) 2263 ( if (>= index len)2264 (values nil idx)2265 (let* ((1st-unit (aref vector index)))2163 (let* ((1st-unit (if swap 2164 (%reversed-u8-ref-u16 vector index) 2165 (%native-u8-ref-u16 vector index)))) 2266 2166 (declare (type (unsigned-byte 16) 1st-unit)) 2267 (if swap (setq 1st-unit (%swap-u16 1st-unit))) 2268 (let* ((char (code-char 1st-unit))) 2269 (if char 2270 (setf (schar string i) char) 2271 (return (values nil idx)))))))))) 2167 (setf (schar string i) (or (code-char 1st-unit) #\Replacement_Character))))))) 2272 2168 :memory-encode-function 2273 2169 (nfunction 2274 2170 ucs-2-memory-encode 2275 (lambda (string pointer idx &optional (start 0) (end (length string)))2171 (lambda (string pointer idx start end) 2276 2172 (declare (fixnum idx)) 2277 2173 (when (> end start) 2278 (setf (%get-unsigned-word pointer (+ idx idx))2174 (setf (%get-unsigned-word pointer idx) 2279 2175 byte-order-mark-char-code) 2280 (incf idx ))2176 (incf idx 2)) 2281 2177 (do* ((i start (1+ i))) 2282 2178 ((>= i end) idx) 2283 (let* ((code (char-code (schar string i))) 2284 (p (+ idx idx))) 2285 (declare (type (mod #x110000) code) 2286 (fixnum p)) 2287 (cond ((< code #x10000) 2288 (setf (%get-unsigned-word pointer p) code) 2289 (incf idx) 2290 (incf p 2)) 2291 (t 2292 (return (values nil idx)))))))) 2179 (let* ((code (char-code (schar string i)))) 2180 (declare (type (mod #x110000) code)) 2181 (setf (%get-unsigned-word pointer idx) 2182 (if (>= code #x10000) 2183 (char-code #\Replacement_Character) 2184 code)) 2185 (incf idx 2))))) 2293 2186 :memory-decode-function 2294 2187 (nfunction 2295 2188 ucs-2-memory-decode 2296 (lambda (pointer n units idx string)2297 (declare (fixnum n units idx))2298 (let* ((swap (when (> n units 0)2299 (case (%get-unsigned-word pointer (+ idx idx))2189 (lambda (pointer noctets idx string) 2190 (declare (fixnum noctets idx)) 2191 (let* ((swap (when (> noctets 1) 2192 (case (%get-unsigned-word pointer idx) 2300 2193 (#.byte-order-mark-char-code 2301 (incf idx )2302 (decf n units)2194 (incf idx 2) 2195 (decf noctets 2) 2303 2196 nil) 2304 2197 (#.swapped-byte-order-mark-char-code 2305 (incf idx )2306 (decf n units)2198 (incf idx 2) 2199 (decf noctets 2) 2307 2200 t) 2308 2201 (t #+little-endian-target t))))) 2309 2202 (do* ((i 0 (1+ i)) 2310 (index idx (1+ index)) 2311 (p (+ index index) (+ p 2))) 2312 ((>= i nunits) (values string index)) 2313 (declare (fixnum i index p)) 2314 (let* ((1st-unit (%get-unsigned-word pointer p))) 2315 (declare (type (unsigned-byte 16) 1st-unit)) 2316 (if swap (setq 1st-unit (%swap-u16 1st-unit))) 2317 (let* ((char (code-char 1st-unit))) 2318 (if char 2319 (setf (schar string i) char) 2320 (return (values nil idx))))))))) 2321 :units-in-string-function 2203 (index idx (+ index 2))) 2204 ((>= i noctets) index) 2205 (declare (fixnum i index)) 2206 (let* ((1st-unit (%get-unsigned-word pointer index))) 2207 (declare (type (unsigned-byte 16) 1st-unit)) 2208 (if swap (setq 1st-unit (%swap-u16 1st-unit))) 2209 (setf (schar string i) (or (code-char 1st-unit) #\Replacement_Character))))))) 2210 :octets-in-string-function 2322 2211 #'(lambda (&rest args) 2323 2212 (declare (dynamic-extent args)) 2324 ;; Add onefor the BOM.2325 ( 1+ (apply #'ucs-2-units-in-string args)))2213 ;; Add two for the BOM. 2214 (+ 2 (apply #'ucs-2-octets-in-string args))) 2326 2215 :length-of-vector-encoding-function 2327 2216 (nfunction 2328 2217 ucs-2-length-of-vector-encoding 2329 (lambda (vector &optional (start 0) (end (length vector))) 2330 (declare (type (simple-array (unsigned-byte 16) (*)) vector)) 2331 (let* ((swap (when (> end start) 2332 (case (aref vector start) 2333 (#.byte-order-mark-char-code 2334 (incf start) 2335 nil) 2336 (#.swapped-byte-order-mark-char-code 2337 (incf start) 2338 t) 2339 (t #+little-endian-target t))))) 2340 (do* ((i start) 2341 (nchars 0 (1+ nchars))) 2342 ((>= i end) 2343 (if (= i end) nchars)) 2344 (let* ((code (aref vector i))) 2345 (declare (type (unsigned-byte 16) code)) 2346 (if swap (setq code (%swap-u16 code))) 2347 (incf i)))))) 2218 (lambda (vector start end) 2219 (declare (ignore vector)) 2220 (do* ((i start (1+ i)) 2221 (j (+ i 2) (+ i 2)) 2222 (nchars 0 (1+ nchars))) 2223 ((> j end) (values nchars i))))) 2348 2224 :length-of-memory-encoding-function 2349 2225 (nfunction 2350 2226 ucs-2-length-of-memory-encoding 2351 (lambda (pointer nunits &optional (start 0)) 2352 (when (> nunits 1) 2353 (case (%get-unsigned-word pointer (+ start start)) 2354 (#.byte-order-mark-char-code 2355 (incf start) 2356 (decf nunits) 2357 nil) 2358 (#.swapped-byte-order-mark-char-code 2359 (incf start) 2360 (decf nunits) 2361 t) 2362 (t #+little-endian-target t))) 2363 (do* ((i start (1+ i)) 2364 (p (+ start start) (+ p 2)) 2365 (nchars 0 (1+ nchars))) 2366 ((>= i nunits) 2367 (if (= i nunits) nchars))))) 2227 (lambda (pointer noctets start) 2228 (when (> noctets 1) 2229 (case (%get-unsigned-word pointer ) 2230 (#.byte-order-mark-char-code 2231 (incf start 2) 2232 (decf noctets 2)) 2233 (#.swapped-byte-order-mark-char-code 2234 (incf start) 2235 (decf noctets)))) 2236 (values (floor noctets 2) (+ start noctets)))) 2368 2237 :literal-char-code-limit #x10000 2369 2238 :use-byte-order-mark … … 2400 2269 2401 2270 (defun cstring-encoded-length-in-bytes (encoding string start end) 2402 (ash (+ 1 ; NULL terminator 2403 (funcall (character-encoding-units-in-string-function encoding) 2404 string 2405 (or start 0) 2406 (or end (length string)))) 2407 (case (character-encoding-code-unit-size encoding) 2408 (8 0) 2409 (16 1) 2410 (32 2)))) 2271 (+ 1 ; NULL terminator 2272 (funcall (character-encoding-octets-in-string-function encoding) 2273 string 2274 (or start 0) 2275 (or end (length string))))) 2411 2276 2412 2277 (defun encode-string-to-memory (encoding pointer offset string start end)
Note:
See TracChangeset
for help on using the changeset viewer.
