Changeset 5262
- Timestamp:
- Sep 26, 2006, 7:43:23 PM (18 years ago)
- File:
-
- 1 edited
-
trunk/ccl/level-1/l1-unicode.lisp (modified) (15 diffs)
Legend:
- Unmodified
- Added
- Removed
-
trunk/ccl/level-1/l1-unicode.lisp
r5254 r5262 47 47 stream-decode-function ;(1ST-UNIT NEXT-UNIT STREAM) 48 48 49 ;; Returns NIL if the charactercan't be encoded, else sets 1 or49 ;; Returns NIL if the string can't be encoded, else sets 1 or 50 50 ;; more units in a vector argument and returns a value 1 greater 51 51 ;; than the index of the last unit written to the vector 52 vector-encode-function ;(CHAR VECTOR INDEX) 52 vector-encode-function ;(STRING VECTOR INDEX &optional 53 ;(START 0) (END (length string))) 53 54 54 ;; Returns a characterand a value 1 greater than the last unit55 ;; Returns the string and a value 1 greater than the last unit 55 56 ;; index consumed from the vector argument, or NIL and the 56 57 ;; argument index if the character can't be decoded. 57 vector-decode-function ;(VECTOR INDEX )58 vector-decode-function ;(VECTOR INDEX NUNITS STRING) 58 59 59 60 ;; Sets one or more units in memory at the address denoted by 60 61 ;; the pointer and idx arguments and returns (+ idx number of 61 ;; units written to memory), else returns NIL if thecharacter62 ;; units written to memory), else returns NIL if any character 62 63 ;; can't be encoded. 63 memory-encode-function ;(CHAR POINTER INDEX) 64 memory-encode-function ;(STRING POINTER INDEX &optional 65 ; (START 0) (END (length string))) 64 66 65 ;; Returns (as multiple values) the characterencoded in memory67 ;; Returns (as multiple values) the string encoded in memory 66 68 ;; at the address denoted by the address and index args and the 67 69 ;; sum of the index arg and the number of units consumed, else 68 ;; NIL and the incoming index arg if the character can't be70 ;; NIL and the incoming index arg if the characters can't be 69 71 ;; encoded. (Note that the index args are and return value 70 72 ;; are "code unit indices", not "byte offsets".) 71 memory-decode-function ;(POINTER INDEX)73 memory-decode-function ;(POINTER NUNITS INDEX STRING) 72 74 73 75 ;; Returns the number of units needed to encode STRING between START and END. … … 84 86 ;; Does a byte-order-mark determine the endianness of input ? 85 87 ;; Should we prepend a BOM to output ? 86 ;; If non-nil, the value should be a cons:87 ;; (native-byte-order-encoding . swapped-byte-order-encoding)88 ;; If non-nil, the value should be the name of the an encoding 89 ;; that implements this encoding with swapped byte order. 88 90 (use-byte-order-mark nil) 89 91 ) … … 136 138 (nfunction 137 139 iso-8859-1-vector-encode 138 (lambda ( char vector idx)140 (lambda (string vector idx &optional (start 0) (end (length string))) 139 141 (declare (type (simple-array (unsigned-byte 8) (*)) vector) 140 142 (fixnum idx)) 141 (let* ((code (char-code char))) 142 (declare (type (mod #x110000) code)) 143 (when (and (< code 256) 144 (< idx (the fixnum (length vector)))) 145 (setf (aref vector idx) code) 146 (the fixnum (1+ idx)))))) 143 (do* ((i start (1+ i))) 144 ((>= i end) idx) 145 (let* ((char (schar string i)) 146 (code (char-code char))) 147 (declare (type (mod #x110000) code)) 148 (if (>= code 256) 149 (return nil) 150 (progn 151 (setf (aref vector idx) code) 152 (incf idx))))))) 147 153 :vector-decode-function 148 154 (nfunction 149 155 iso-8859-1-vector-decode 150 (lambda (vector idx )156 (lambda (vector idx nunits string) 151 157 (declare (type (simple-array (unsigned-byte 8) (*)) vector)) 152 (if (< idx (length vector)) 153 (values (code-char (aref vector idx)) 154 (the fixnum (1+ (the fixnum idx)))) 155 (values nil idx)))) 158 (do* ((i 0 (1+ i)) 159 (len (length vector)) 160 (index idx (1+ index))) 161 ((>= i nunits) (values string index)) 162 (if (>= index len) 163 (return (values nil idx)) 164 (setf (schar string i) (code-char (the (unsigned-byte 8) 165 (aref vector index)))))))) 156 166 :memory-encode-function 157 167 (nfunction 158 168 iso-8859-1-memory-encode 159 (lambda (char pointer idx) 160 (let* ((code (char-code char))) 161 (declare (type (mod #x110000) code)) 162 (when (< code 256) 163 (setf (%get-unsigned-byte pointer idx) code) 164 (1+ idx))))) 169 (lambda (string pointer idx &optional (start 0) (end (length string))) 170 (do* ((i start (1+ i))) 171 ((>= i end) idx) 172 (let* ((code (char-code (schar string i)))) 173 (declare (type (mod #x110000) code)) 174 (if (>= code 256) 175 (return nil) 176 (progn 177 (setf (%get-unsigned-byte pointer idx) code) 178 (incf idx))))))) 165 179 :memory-decode-function 166 180 (nfunction 167 181 iso-8859-1-memory-decode 168 (lambda (pointer idx) 169 (values (code-char (%get-unsigned-byte pointer idx)) 170 (the fixnum (1+ (the fixnum idx)))))) 182 (lambda (pointer nunits idx string) 183 (do* ((i 0 (1+ i)) 184 (index idx (1+ index))) 185 ((>= i nunits) (values string index)) 186 (setf (schar string i) (code-char (the (unsigned-byte 8) 187 (%get-unsigned-byte pointer index))))))) 171 188 :units-in-string-function 172 189 (nfunction … … 299 316 (nfunction 300 317 iso-8859-2-vector-encode 301 (lambda ( char vector idx)318 (lambda (string vector idx &optional (start 0) (end (length string))) 302 319 (declare (type (simple-array (unsigned-byte 8) (*)) vector) 303 320 (fixnum idx)) 304 (let* ((code (char-code char)) 305 (c2 (when (< idx (the fixnum (length vector))) 306 (cond ((< code #xa0) code) 307 ((< code #x180) 308 (svref *unicode-00a0-0180-to-iso8859-2* 309 (the fixnum (- code #xa0)))) 310 ((and (>= code #x2c0) (< code #x2e0)) 311 (svref *unicode-00c0-00e0-to-iso8859-2* 312 (the fixnum (- code #x2c0)))))))) 313 (declare (type (mod #x110000) code)) 314 (when c2 315 (setf (aref vector idx) c2) 316 (the fixnum (1+ idx)))))) 321 (do* ((i start (1+ i))) 322 ((>= i end) idx) 323 (let* ((code (char-code (schar string i))) 324 (c2 (cond ((< code #xa0) code) 325 ((< code #x180) 326 (svref *unicode-00a0-0180-to-iso8859-2* 327 (the fixnum (- code #xa0)))) 328 ((and (>= code #x2c0) (< code #x2e0)) 329 (svref *unicode-00c0-00e0-to-iso8859-2* 330 (the fixnum (- code #x2c0))))))) 331 (declare (type (mod #x110000) code)) 332 (if (null c2) 333 (return nil) 334 (progn 335 (setf (aref vector idx) c2) 336 (incf idx))))))) 317 337 :vector-decode-function 318 338 (nfunction 319 339 iso-8859-2-vector-decode 320 (lambda (vector idx )340 (lambda (vector idx nunits string) 321 341 (declare (type (simple-array (unsigned-byte 8) (*)) vector)) 322 (if (< idx (length vector)) 323 (let* ((1st-unit (aref vector idx))) 324 (declare (type (unsigned-byte 8) 1st-unit)) 325 (values 326 (if (< 1st-unit #xa0) 327 (code-char 1st-unit) 328 (svref *iso-8859-2-to-unicode* (the fixnum (- 1st-unit #xa0)))) 329 (the fixnum (1+ (the fixnum idx))))) 330 (values nil idx)))) 342 (do* ((i 0 (1+ i)) 343 (len (length vector)) 344 (index idx (1+ index))) 345 ((>= i nunits) (values string index)) 346 (if (>= index len) 347 (return (values nil idx)) 348 (let* ((1st-unit (aref vector index))) 349 (declare (type (unsigned-byte 8) 1st-unit)) 350 (setf (schar string i) 351 (if (< 1st-unit #xa0) 352 (code-char 1st-unit) 353 (svref *iso-8859-2-to-unicode* (the fixnum (- 1st-unit #xa0)))) 354 )))))) 331 355 :memory-encode-function 332 356 (nfunction 333 357 iso-8859-2-memory-encode 334 (lambda (char pointer idx) 335 (let* ((code (char-code char)) 336 (c2 (cond ((< code #xa0) code) 358 (lambda (string pointer idx &optional (start 0) (end (length string))) 359 (do* ((i start (1+ i))) 360 ((>= i end) idx) 361 (let* ((code (char-code (schar string i))) 362 (c2 (cond ((< code #xa0) code) 337 363 ((< code #x180) 338 364 (svref *unicode-00a0-0180-to-iso8859-2* … … 342 368 (the fixnum (- code #x2c0))))))) 343 369 (declare (type (mod #x110000) code)) 344 (when c2 345 (setf (%get-unsigned-byte pointer idx) c2) 346 (1+ idx))))) 370 (if (null c2) 371 (return nil) 372 (progn 373 (setf (%get-unsigned-byte pointer idx) c2) 374 (1+ idx))))))) 347 375 :memory-decode-function 348 376 (nfunction 349 377 iso-8859-2-memory-decode 350 (lambda (pointer idx) 351 (let* ((1st-unit (%get-unsigned-byte pointer idx))) 352 (declare (type (unsigned-byte 8) 1st-unit)) 353 (values (if (< 1st-unit #xa0) 378 (lambda (pointer nunits idx string) 379 (do* ((i 0 (1+ i)) 380 (index idx (1+ index))) 381 ((>= i nunits) (values string index)) 382 (let* ((1st-unit (%get-unsigned-byte pointer index))) 383 (declare (type (unsigned-byte 8) 1st-unit)) 384 (setf (schar string i) 385 (if (< 1st-unit #xa0) 354 386 (code-char 1st-unit) 355 (svref *iso-8859-2-to-unicode* (the fixnum (- 1st-unit #xa0)))) 356 (the fixnum (1+ (the fixnum idx))))))) 387 (svref *iso-8859-2-to-unicode* (the fixnum (- 1st-unit #xa0))))))))) 357 388 :units-in-string-function 358 389 (nfunction … … 484 515 (nfunction 485 516 iso-8859-3-vector-encode 486 (lambda ( char vector idx)517 (lambda (string vector idx &optional (start 0) (end (length string))) 487 518 (declare (type (simple-array (unsigned-byte 8) (*)) vector) 488 519 (fixnum idx)) 489 (let* ((code (char-code char)) 490 (c2 (when (< idx (the fixnum (length vector))) 491 (cond ((< code #xa0) code) 492 ((< code #x100) 493 (svref *unicode-a0-100-to-iso8859-3* 494 (the fixnum (- code #xa0)))) 495 ((and (>= code #x108) (< code #x180)) 496 (svref *unicode-108-180-to-iso8859-3* 497 (the fixnum (- code #x108)))) 498 ((and (>= code #x2d8) (< code #x2e0)) 499 (svref *unicode-2d8-2e0-to-iso8859-3* 500 (the fixnum (- code #x2d8)))))))) 501 (declare (type (mod #x110000) code)) 502 (when c2 503 (setf (aref vector idx) c2) 504 (the fixnum (1+ idx)))))) 520 (do* ((i start (1+ i))) 521 ((>= i end) idx) 522 (let* ((char (schar string i)) 523 (code (char-code char)) 524 (c2 (cond ((< code #xa0) code) 525 ((< code #x100) 526 (svref *unicode-a0-100-to-iso8859-3* 527 (the fixnum (- code #xa0)))) 528 ((and (>= code #x108) (< code #x180)) 529 (svref *unicode-108-180-to-iso8859-3* 530 (the fixnum (- code #x108)))) 531 ((and (>= code #x2d8) (< code #x2e0)) 532 (svref *unicode-2d8-2e0-to-iso8859-3* 533 (the fixnum (- code #x2d8))))))) 534 (declare (type (mod #x110000) code)) 535 (if (null c2) 536 (return nil) 537 (progn 538 (setf (aref vector idx) c2) 539 (incf idx))))))) 505 540 :vector-decode-function 506 541 (nfunction 507 542 iso-8859-3-vector-decode 508 (lambda (vector idx )543 (lambda (vector idx nunits string) 509 544 (declare (type (simple-array (unsigned-byte 8) (*)) vector)) 510 (if (< idx (length vector)) 511 (let* ((1st-unit (aref vector idx))) 545 (do* ((i 0 (1+ i)) 546 (len (length vector)) 547 (index idx (1+ index))) 548 ((>= i nunits) (values string index)) 549 (if (>= index len) 550 (return (values nil idx)) 551 (let* ((1st-unit (aref vector index))) 552 (declare (type (unsigned-byte 8) 1st-unit)) 553 (setf (schar string i) 554 (if (< 1st-unit #xa0) 555 (code-char 1st-unit) 556 (svref *iso-8859-3-to-unicode* (the fixnum (- 1st-unit #xa0)))))))))) 557 :memory-encode-function 558 (nfunction 559 iso-8859-3-memory-encode 560 (lambda (string pointer idx &optional (start 0) (end (length string))) 561 (do* ((i start (1+ i))) 562 ((>= i end) idx) 563 (let* ((code (char-code (schar string i))) 564 (c2 (cond ((< code #xa0) code) 565 ((< code #x100) 566 (svref *unicode-a0-100-to-iso8859-3* 567 (the fixnum (- code #xa0)))) 568 ((and (>= code #x108) (< code #x180)) 569 (svref *unicode-108-180-to-iso8859-3* 570 (the fixnum (- code #x108)))) 571 ((and (>= code #x2d8) (< code #x2e0)) 572 (svref *unicode-2d8-2e0-to-iso8859-3* 573 (the fixnum (- code #x2d8))))))) 574 (declare (type (mod #x110000) code)) 575 (if (null c2) 576 (return nil) 577 (progn 578 (setf (%get-unsigned-byte pointer idx) c2) 579 (incf idx))))))) 580 :memory-decode-function 581 (nfunction 582 iso-8859-3-memory-decode 583 (lambda (pointer nunits idx string) 584 (do* ((i 0 (1+ i)) 585 (index idx (1+ index))) 586 ((>= i nunits) (values string index)) 587 (let* ((1st-unit (%get-unsigned-byte pointer index))) 512 588 (declare (type (unsigned-byte 8) 1st-unit)) 513 (values 514 (if (< 1st-unit #xa0) 515 (code-char 1st-unit) 516 (svref *iso-8859-3-to-unicode* (the fixnum (- 1st-unit #xa0)))) 517 (the fixnum (1+ (the fixnum idx))))) 518 (values nil idx)))) 519 :memory-encode-function 520 (nfunction 521 iso-8859-3-memory-encode 522 (lambda (char pointer idx) 523 (let* ((code (char-code char)) 524 (c2 (cond ((< code #xa0) code) 525 ((< code #x100) 526 (svref *unicode-a0-100-to-iso8859-3* 527 (the fixnum (- code #xa0)))) 528 ((and (>= code #x108) (< code #x180)) 529 (svref *unicode-108-180-to-iso8859-3* 530 (the fixnum (- code #x108)))) 531 ((and (>= code #x2d8) (< code #x2e0)) 532 (svref *unicode-2d8-2e0-to-iso8859-3* 533 (the fixnum (- code #x2d8))))))) 534 (declare (type (mod #x110000) code)) 535 (when c2 536 (setf (%get-unsigned-byte pointer idx) c2) 537 (1+ idx))))) 538 :memory-decode-function 539 (nfunction 540 iso-8859-3-memory-decode 541 (lambda (pointer idx) 542 (let* ((1st-unit (%get-unsigned-byte pointer idx))) 543 (declare (type (unsigned-byte 8) 1st-unit)) 544 (values (if (< 1st-unit #xa0) 589 (setf (schar string i) 590 (if (< 1st-unit #xa0) 545 591 (code-char 1st-unit) 546 (svref *iso-8859-3-to-unicode* (the fixnum (- 1st-unit #xa0)))) 547 (the fixnum (1+ (the fixnum idx))))))) 592 (svref *iso-8859-3-to-unicode* (the fixnum (- 1st-unit #xa0))))))))) 548 593 :units-in-string-function 549 594 (nfunction 550 iso-8859- 1-units-in-string595 iso-8859-3-units-in-string 551 596 (lambda (string &optional (start 0) (end (length string))) 552 597 (when (>= end start) … … 677 722 (nfunction 678 723 iso-8859-4-vector-encode 679 (lambda ( char vector idx)724 (lambda (string vector idx &optional (start 0) (end (length string))) 680 725 (declare (type (simple-array (unsigned-byte 8) (*)) vector) 681 726 (fixnum idx)) 682 (let* ((code (char-code char)) 683 (c2 (when (< idx (the fixnum (length vector))) 684 (cond ((< code #xa0) code) 685 ((< code #x180) 686 (svref *unicode-a0-180-to-iso8859-4* 687 (the fixnum (- code #xa0)))) 688 ((and (>= code #x2d8) (< code #x2e0)) 689 (svref *unicode-2c0-2e0-to-iso8859-4* 690 (the fixnum (- code #x2c0)))))))) 691 (declare (type (mod #x110000) code)) 692 (when c2 693 (setf (aref vector idx) c2) 694 (the fixnum (1+ idx)))))) 727 (do* ((i start (1+ i))) 728 ((>= i end) idx) 729 (let* ((char (schar string i)) 730 (code (char-code char)) 731 (c2 (cond ((< code #xa0) code) 732 ((< code #x180) 733 (svref *unicode-a0-180-to-iso8859-4* 734 (the fixnum (- code #xa0)))) 735 ((and (>= code #x2d8) (< code #x2e0)) 736 (svref *unicode-2c0-2e0-to-iso8859-4* 737 (the fixnum (- code #x2c0))))))) 738 (declare (type (mod #x110000) code)) 739 (if (null c2) 740 (return nil) 741 (progn 742 (setf (aref vector idx) c2) 743 (incf idx))))))) 695 744 :vector-decode-function 696 745 (nfunction 697 746 iso-8859-4-vector-decode 698 (lambda (vector idx )747 (lambda (vector idx nunits string) 699 748 (declare (type (simple-array (unsigned-byte 8) (*)) vector)) 700 (if (< idx (length vector)) 701 (let* ((1st-unit (aref vector idx))) 749 (do* ((i 0 (1+ i)) 750 (len (length vector)) 751 (index idx (1+ index))) 752 ((>= i nunits) (values string index)) 753 (if (>= index len) 754 (return (values nil idx)) 755 (let* ((1st-unit (aref vector index))) 756 (declare (type (unsigned-byte 8) 1st-unit)) 757 (setf (schar string i) 758 (if (< 1st-unit #xa0) 759 (code-char 1st-unit) 760 (svref *iso-8859-4-to-unicode* (the fixnum (- 1st-unit #xa0)))))))))) 761 :memory-encode-function 762 (nfunction 763 iso-8859-4-memory-encode 764 (lambda (string pointer idx &optional (start 0) (end (length string))) 765 (do* ((i start (1+ i))) 766 ((>= i end) idx) 767 (let* ((code (char-code (schar string i))) 768 (c2 (cond ((< code #xa0) code) 769 ((< code #x180) 770 (svref *unicode-a0-180-to-iso8859-4* 771 (the fixnum (- code #xa0)))) 772 ((and (>= code #x2d8) (< code #x2e0)) 773 (svref *unicode-2c0-2e0-to-iso8859-4* 774 (the fixnum (- code #x2c0))))))) 775 (declare (type (mod #x110000) code)) 776 (if (null c2) 777 (return nil) 778 (progn 779 (setf (%get-unsigned-byte pointer idx) c2) 780 (incf idx))))))) 781 :memory-decode-function 782 (nfunction 783 iso-8859-4-memory-decode 784 (lambda (pointer nunits idx string) 785 (do* ((i 0 (1+ i)) 786 (index idx (1+ index))) 787 ((>= i nunits) (values string index)) 788 (let* ((1st-unit (%get-unsigned-byte pointer index))) 702 789 (declare (type (unsigned-byte 8) 1st-unit)) 703 (values 704 (if (< 1st-unit #xa0) 705 (code-char 1st-unit) 706 (svref *iso-8859-4-to-unicode* (the fixnum (- 1st-unit #xa0)))) 707 (the fixnum (1+ (the fixnum idx))))) 708 (values nil idx)))) 709 :memory-encode-function 710 (nfunction 711 iso-8859-4-memory-encode 712 (lambda (char pointer idx) 713 (let* ((code (char-code char)) 714 (c2 (cond ((< code #xa0) code) 715 ((< code #x180) 716 (svref *unicode-a0-180-to-iso8859-4* 717 (the fixnum (- code #xa0)))) 718 ((and (>= code #x2d8) (< code #x2e0)) 719 (svref *unicode-2c0-2e0-to-iso8859-4* 720 (the fixnum (- code #x2c0))))))) 721 (declare (type (mod #x110000) code)) 722 (when c2 723 (setf (%get-unsigned-byte pointer idx) c2) 724 (1+ idx))))) 725 :memory-decode-function 726 (nfunction 727 iso-8859-4-memory-decode 728 (lambda (pointer idx) 729 (let* ((1st-unit (%get-unsigned-byte pointer idx))) 730 (declare (type (unsigned-byte 8) 1st-unit)) 731 (values (if (< 1st-unit #xa0) 790 (setf (schar string i) 791 (if (< 1st-unit #xa0) 732 792 (code-char 1st-unit) 733 (svref *iso-8859-4-to-unicode* (the fixnum (- 1st-unit #xa0)))) 734 (the fixnum (1+ (the fixnum idx))))))) 793 (svref *iso-8859-4-to-unicode* (the fixnum (- 1st-unit #xa0))))))))) 735 794 :units-in-string-function 736 795 (nfunction … … 871 930 (nfunction 872 931 utf-8-vector-encode 873 (lambda ( char vector index)932 (lambda (string vector idx &optional (start 0) (end (length string))) 874 933 (declare (type (simple-array (unsigned-byte 8) (*)) vector) 875 (type index index) 876 (optimize (speed 3) (safety 0))) 877 (let* ((len (length vector)) 878 (code (char-code char))) 879 (declare (type index len) 880 (type (mod #x110000) code)) 881 (if (< code #x80) 882 (when (< index len) 883 (setf (aref vector index) code) 884 (the fixnum (+ index 1))) 885 (let* ((i1 (1+ index))) 886 (declare (fixnum i1)) 887 (if (< code #x800) 888 (when (< i1 len) 889 (setf (aref vector index) 890 (logior #xc0 (the fixnum (ash code -6))) 891 (aref vector i1) 892 (logior #x80 (the fixnum (logand code #x3f)))) 893 (the fixnum (+ i1 1))) 894 (let* ((i2 (1+ i1))) 895 (declare (fixnum i2)) 896 (if (< code #x10000) 897 (when (< i2 len) 898 (setf (aref vector index) 899 (logior #xe0 (the fixnum (ash code -12))) 900 (aref vector i1) 901 (logior #x80 (the fixnum (logand #x3f (the fixnum (ash code -6))))) 902 (aref vector i2) 903 (logior #x80 (the fixnum (logand code #x3f)))) 904 (the fixnum (+ i2 1))) 905 (let* ((i3 (1+ i2))) 906 (declare (fixnum i3)) 907 (when (< i3 len) 908 (setf (aref vector index) 909 (logior #xf0 910 (the fixnum (logand #x7 (the fixnum (ash code -18))))) 911 (aref vector i1) 912 (logior #x80 (the fixnum (logand #x3f (the fixnum (ash code -12))))) 913 (aref vector i2) 914 (logior #x80 (the fixnum (logand #x3f (the fixnum (ash code -6))))) 915 (aref vector i3) 916 (logand #x3f code)) 917 (the fixnum (+ i3 1)))))))))))) 934 (fixnum idx)) 935 (do* ((i start (1+ i))) 936 ((>= i end) idx) 937 (let* ((char (schar string i)) 938 (code (char-code char))) 939 (declare (type (mod #x110000) code)) 940 (cond ((< code #x80) 941 (setf (aref vector idx) code) 942 (incf idx)) 943 ((< code #x800) 944 (setf (aref vector idx) 945 (logior #xc0 (the fixnum (ash code -6)))) 946 (incf idx) 947 (setf (aref vector idx) 948 (logior #x80 (the fixnum (logand code #x3f)))) 949 (incf idx)) 950 ((< code #x10000) 951 (setf (aref vector idx) 952 (logior #xe0 (the fixnum (ash code -12)))) 953 (incf idx) 954 (setf (aref vector idx) 955 (logior #x80 (the fixnum (logand #x3f (the fixnum (ash code -6)))))) 956 (incf idx) 957 (setf (aref vector idx) 958 (logior #x80 (the fixnum (logand code #x3f)))) 959 (incf idx)) 960 (t 961 (setf (aref vector idx) 962 (logior #xf0 963 (the fixnum (logand #x7 (the fixnum (ash code -18)))))) 964 (incf idx) 965 (setf (aref vector idx) 966 (logior #x80 (the fixnum (logand #x3f (the fixnum (ash code -12)))))) 967 (incf idx) 968 (setf (aref vector idx) 969 (logior #x80 (the fixnum (logand #x3f (the fixnum (ash code -6)))))) 970 (incf idx) 971 (setf (aref vector idx) (logand #x3f code)) 972 (incf idx))))))) 918 973 :vector-decode-function 919 974 (nfunction 920 975 utf-8-vector-decode 921 (lambda (vector idx )976 (lambda (vector idx nunits string) 922 977 (declare (type (simple-array (unsigned-byte 8) (*)) vector) 923 978 (type index idx)) 924 (let* ((len (length vector))) 925 (declare (fixnum len)) 926 (if (>= idx len) 979 (do* ((i 0 (1+ i)) 980 (len (length vector)) 981 (index idx (1+ index))) 982 ((>= i nunits) (values string index)) 983 (if (>= index len) 927 984 (values nil idx) 928 (let* ((1st-unit (aref vector i dx)))985 (let* ((1st-unit (aref vector index))) 929 986 (declare (type (unsigned-byte 8) 1st-unit)) 930 (if (< 1st-unit #x80) 931 (values (code-char 1st-unit) (the fixnum (1+ idx))) 932 (if (>= 1st-unit #xc2) 933 (let* ((i1 (1+ idx))) 934 (declare (fixnum i1)) 935 (if (>= i1 len) 936 (values nil idx) 937 (let* ((s1 (aref vector i1))) 938 (declare (type (unsigned-byte 8) s1)) 939 (if (< 1st-unit #xe0) 940 (if (< (the fixnum (logxor s1 #x80)) #x40) 941 (values 942 (code-char 943 (logior 944 (the fixnum (ash (the fixnum (logand #x1f 1st-unit)) 6)) 945 (the fixnum (logxor s1 #x80)))) 946 (the fixnum (1+ i1))) 947 (values nil i1)) 948 (let* ((i2 (1+ i1))) 949 (declare (fixnum i2)) 950 (if (>= i2 len) 951 (values nil idx) 952 (let* ((s2 (aref vector i2))) 953 (declare (type (unsigned-byte 8) s2)) 987 (let* ((char 988 (if (< 1st-unit #x80) 989 (code-char 1st-unit) 990 (if (>= 1st-unit #xc2) 991 (let* ((2nd-unit (aref vector (incf index)))) 992 (declare (type (unsigned-byte 8) 2nd-unit)) 993 (if (< 1st-unit #xe0) 994 (if (< (the fixnum (logxor 2nd-unit #x80)) #x40) 995 (code-char 996 (logior 997 (the fixnum (ash (the fixnum (logand #x1f 1st-unit)) 6)) 998 (the fixnum (logxor 2nd-unit #x80))))) 999 (let* ((3rd-unit (aref vector (incf index)))) 1000 (declare (type (unsigned-byte 8) 3rd-unit)) 954 1001 (if (< 1st-unit #xf0) 955 (if (and (< (the fixnum (logxor s1#x80)) #x40)956 (< (the fixnum (logxor s2#x80)) #x40)1002 (if (and (< (the fixnum (logxor 2nd-unit #x80)) #x40) 1003 (< (the fixnum (logxor 3rd-unit #x80)) #x40) 957 1004 (or (>= 1st-unit #xe1) 958 (>= s1 #xa0))) 959 (values 960 (code-char (the fixnum 961 (logior (the fixnum 962 (ash (the fixnum (logand 1st-unit #xf)) 963 12)) 964 (the fixnum 965 (logior 966 (the fixnum 967 (ash (the fixnum (logand s1 #x3f)) 968 6)) 969 (the fixnum (logand s2 #x3f))))))) 970 (the fixnum (1+ i2))) 971 (values nil idx)) 972 (if (>= 1st-unit #xf8) 973 (values nil idx) 974 (let* ((i3 (1+ i2))) 975 (declare (fixnum i3)) 976 (if (>= i3 len) 977 (values nil idx) 978 (let* ((s3 (aref vector i3))) 979 (declare (type (unsigned-byte 8) s3)) 980 (if (and (< (the fixnum (logxor s1 #x80)) #x40) 981 (< (the fixnum (logxor s2 #x80)) #x40) 982 (< (the fixnum (logxor s3 #x80)) #x40) 983 (or (>= 1st-unit #xf1) 984 (>= s1 #x90))) 985 (values 986 (code-char 987 (logior 988 (the fixnum 989 (logior 990 (the fixnum 991 (ash (the fixnum (logand 1st-unit 7)) 18)) 992 (the fixnum 993 (ash (the fixnum (logxor s1 #x80)) 12)))) 994 (the fixnum 995 (logior 996 (the fixnum 997 (ash (the fixnum (logxor s2 #x80)) 6)) 998 (the fixnum (logxor s3 #x80)))))) 999 (the fixnum (1+ i3))) 1000 (values nil idx)))))))))))))) 1001 (values nil idx)))))))) 1005 (>= 2nd-unit #xa0))) 1006 (code-char (the fixnum 1007 (logior (the fixnum 1008 (ash (the fixnum (logand 1st-unit #xf)) 1009 12)) 1010 (the fixnum 1011 (logior 1012 (the fixnum 1013 (ash (the fixnum (logand 2nd-unit #x3f)) 1014 6)) 1015 (the fixnum (logand 3rd-unit #x3f)))))))) 1016 (let* ((4th-unit (aref vector (incf index)))) 1017 (declare (type (unsigned-byte 8) 4th-unit)) 1018 (if (and (< (the fixnum (logxor 2nd-unit #x80)) #x40) 1019 (< (the fixnum (logxor 3rd-unit #x80)) #x40) 1020 (< (the fixnum (logxor 4th-unit #x80)) #x40) 1021 (or (>= 1st-unit #xf1) 1022 (>= 2nd-unit #x90))) 1023 (code-char 1024 (logior 1025 (the fixnum 1026 (logior 1027 (the fixnum 1028 (ash (the fixnum (logand 1st-unit 7)) 18)) 1029 (the fixnum 1030 (ash (the fixnum (logxor 2nd-unit #x80)) 12)))) 1031 (the fixnum 1032 (logior 1033 (the fixnum 1034 (ash (the fixnum (logxor 3rd-unit #x80)) 6)) 1035 (the fixnum (logxor 4th-unit #x80)))))))))))))))) 1036 (if char 1037 (setf (schar string i) char) 1038 (return (values nil idx))))))))) 1002 1039 :memory-encode-function 1003 1040 (nfunction 1004 1041 utf-8-memory-encode 1005 (lambda ( char pointer idx)1042 (lambda (string pointer idx &optional (start 0) (end (length string))) 1006 1043 (declare (fixnum idx)) 1007 (let* ((code (char-code char)) 1008 (i1 (1+ idx)) 1009 (i2 (1+ i1)) 1010 (i3 (1+ i2))) 1011 (declare (type (mod #x110000) code) 1012 (fixnum i1 i2 i3)) 1013 (cond ((< code #x80) 1014 (setf (%get-unsigned-byte pointer idx) code) 1015 i1) 1016 ((< code #x800) 1017 (setf (%get-unsigned-byte pointer idx) 1018 (logior #xc0 (the fixnum (ash code -6))) 1019 (%get-unsigned-byte pointer i1) 1020 (logior #x80 (the fixnum (logand code #x3f)))) 1021 i2) 1022 ((< code #x10000) 1023 (setf (%get-unsigned-byte pointer idx) 1024 (logior #xe0 (the fixnum (ash code -12))) 1025 (%get-unsigned-byte pointer i1) 1026 (logior #x80 (the fixnum (logand #x3f (the fixnum (ash code -6))))) 1027 (%get-unsigned-byte pointer i2) 1028 (logior #x80 (the fixnum (logand code #x3f)))) 1029 i3) 1030 (t 1031 (setf (%get-unsigned-byte pointer idx) 1032 (logior #xf0 1033 (the fixnum (logand #x7 (the fixnum (ash code -18))))) 1034 (%get-unsigned-byte pointer i1) 1035 (logior #x80 (the fixnum (logand #x3f (the fixnum (ash code -12))))) 1036 (%get-unsigned-byte pointer i2) 1037 (logior #x80 (the fixnum (logand #x3f (the fixnum (ash code -6))))) 1038 (%get-unsigned-byte pointer i3) 1039 (logand #x3f code)) 1040 (the fixnum (1+ i3))))))) 1044 (do* ((i start (1+ i))) 1045 ((>= i end) idx) 1046 (let* ((code (char-code (schar string i)))) 1047 (declare (type (mod #x110000) code)) 1048 (cond ((< code #x80) 1049 (setf (%get-unsigned-byte pointer idx) code) 1050 (incf idx)) 1051 ((< code #x800) 1052 (setf (%get-unsigned-byte pointer idx) 1053 (logior #xc0 (the fixnum (ash code -6)))) 1054 (incf idx) 1055 (setf (%get-unsigned-byte pointer idx) 1056 (logior #x80 (the fixnum (logand code #x3f)))) 1057 (incf idx)) 1058 ((< code #x10000) 1059 (setf (%get-unsigned-byte pointer idx) 1060 (logior #xe0 (the fixnum (ash code -12)))) 1061 (incf idx) 1062 (setf (%get-unsigned-byte pointer idx) 1063 (logior #x80 (the fixnum (logand #x3f (the fixnum (ash code -6)))))) 1064 (incf idx) 1065 (setf (%get-unsigned-byte pointer idx) 1066 (logior #x80 (the fixnum (logand code #x3f)))) 1067 (incf idx)) 1068 (t 1069 (setf (%get-unsigned-byte pointer idx) 1070 (logior #xf0 1071 (the fixnum (logand #x7 (the fixnum (ash code -18)))))) 1072 (incf idx) 1073 (setf (%get-unsigned-byte pointer idx) 1074 (logior #x80 (the fixnum (logand #x3f (the fixnum (ash code -12)))))) 1075 (incf idx) 1076 (setf (%get-unsigned-byte pointer idx) 1077 (logior #x80 (the fixnum (logand #x3f (the fixnum (ash code -6)))))) 1078 (incf idx) 1079 (setf (%get-unsigned-byte pointer idx) 1080 (logand #x3f code)) 1081 (incf idx))))))) 1041 1082 :memory-decode-function 1042 1083 (nfunction 1043 1084 utf-8-memory-decode 1044 (lambda (pointer idx) 1045 (declare (fixnum idx)) 1046 (let* ((1st-unit (%get-unsigned-byte pointer idx)) 1047 (i1 (1+ idx)) 1048 (i2 (1+ i1)) 1049 (i3 (1+ i2))) 1050 (declare (type (unsigned-byte 8) 1st-unit) 1051 (fixnum i1 i2 i3)) 1052 (if (< 1st-unit #x80) 1053 (values (code-char 1st-unit) (the fixnum (1+ idx))) 1054 (if (< 1st-unit #xc2) 1055 (values nil idx) 1056 (let* ((s1 (%get-unsigned-byte pointer i1))) 1057 (declare (type (unsigned-byte 8) s1)) 1058 (if (< 1st-unit #xe0) 1059 (if (< (the fixnum (logxor s1 #x80)) #x40) 1060 (values 1061 (code-char 1062 (logior 1063 (the fixnum (ash (the fixnum (logand #x1f 1st-unit)) 6)) 1064 (the fixnum (logxor s1 #x80)))) 1065 (the fixnum (1+ i1))) 1066 (values nil i1)) 1067 (let* ((s2 (%get-unsigned-byte pointer i2))) 1068 (declare (type (unsigned-byte 8) s2)) 1069 (if (< 1st-unit #xf0) 1070 (if (and (< (the fixnum (logxor s1 #x80)) #x40) 1071 (< (the fixnum (logxor s2 #x80)) #x40) 1072 (or (>= 1st-unit #xe1) 1073 (>= s1 #xa0))) 1074 (values 1075 (code-char (the fixnum 1076 (logior (the fixnum 1077 (ash (the fixnum (logand 1st-unit #xf)) 1078 12)) 1079 (the fixnum 1080 (logior 1081 (the fixnum 1082 (ash (the fixnum (logand s1 #x3f)) 1083 6)) 1084 (the fixnum (logand s2 #x3f))))))) 1085 i3) 1086 (values nil idx)) 1087 (if (>= 1st-unit #xf8) 1088 (values nil idx) 1089 (let* ((s3 (%get-unsigned-byte pointer i3))) 1090 (declare (type (unsigned-byte 8) s3)) 1091 (if (and (< (the fixnum (logxor s1 #x80)) #x40) 1092 (< (the fixnum (logxor s2 #x80)) #x40) 1093 (< (the fixnum (logxor s3 #x80)) #x40) 1094 (or (>= 1st-unit #xf1) 1095 (>= s1 #x90))) 1096 (values 1097 (code-char 1098 (logior 1099 (the fixnum 1100 (logior 1101 (the fixnum 1102 (ash (the fixnum (logand 1st-unit 7)) 18)) 1103 (the fixnum 1104 (ash (the fixnum (logxor s1 #x80)) 12)))) 1105 (the fixnum 1106 (logior 1107 (the fixnum 1108 (ash (the fixnum (logxor s2 #x80)) 6)) 1109 (the fixnum (logxor s3 #x80)))))) 1110 (the fixnum (1+ i3))) 1111 (values nil idx))))))))))))) 1085 (lambda (pointer nunits idx string) 1086 (declare (fixnum nunits idx)) 1087 (do* ((i 0 (1+ i)) 1088 (index idx (1+ index))) 1089 ((>= i nunits) (values string index)) 1090 (let* ((1st-unit (%get-unsigned-byte pointer index))) 1091 (declare (type (unsigned-byte 8) 1st-unit)) 1092 (let* ((char (if (< 1st-unit #x80) 1093 (code-char 1st-unit) 1094 (if (>= 1st-unit #xc2) 1095 (let* ((2nd-unit (%get-unsigned-byte pointer (incf index)))) 1096 (declare (type (unsigned-byte 8) 2nd-unit)) 1097 (if (< 1st-unit #xe0) 1098 (if (< (the fixnum (logxor 2nd-unit #x80)) #x40) 1099 (code-char 1100 (logior 1101 (the fixnum (ash (the fixnum (logand #x1f 1st-unit)) 6)) 1102 (the fixnum (logxor 2nd-unit #x80))))) 1103 (let* ((3rd-unit (%get-unsigned-byte pointer (incf index)))) 1104 (declare (type (unsigned-byte 8) 3rd-unit)) 1105 (if (< 1st-unit #xf0) 1106 (if (and (< (the fixnum (logxor 2nd-unit #x80)) #x40) 1107 (< (the fixnum (logxor 3rd-unit #x80)) #x40) 1108 (or (>= 1st-unit #xe1) 1109 (>= 2nd-unit #xa0))) 1110 (code-char (the fixnum 1111 (logior (the fixnum 1112 (ash (the fixnum (logand 1st-unit #xf)) 1113 12)) 1114 (the fixnum 1115 (logior 1116 (the fixnum 1117 (ash (the fixnum (logand 2nd-unit #x3f)) 1118 6)) 1119 (the fixnum (logand 3rd-unit #x3f)))))))) 1120 (if (< 1st-unit #xf8) 1121 (let* ((4th-unit (%get-unsigned-byte pointer (incf index)))) 1122 (declare (type (unsigned-byte 8) 4th-unit)) 1123 (if (and (< (the fixnum (logxor 2nd-unit #x80)) #x40) 1124 (< (the fixnum (logxor 3rd-unit #x80)) #x40) 1125 (< (the fixnum (logxor 4th-unit #x80)) #x40) 1126 (or (>= 1st-unit #xf1) 1127 (>= 2nd-unit #x90))) 1128 (code-char 1129 (logior 1130 (the fixnum 1131 (logior 1132 (the fixnum 1133 (ash (the fixnum (logand 1st-unit 7)) 18)) 1134 (the fixnum 1135 (ash (the fixnum (logxor 2nd-unit #x80)) 12)))) 1136 (the fixnum 1137 (logior 1138 (the fixnum 1139 (ash (the fixnum (logxor 3rd-unit #x80)) 6)) 1140 (the fixnum (logxor 4th-unit #x80))))))))))))))))) 1141 (if char 1142 (setf (schar string i) char) 1143 (return (values nil idx)))))))) 1112 1144 :units-in-string-function 1113 1145 (nfunction … … 1230 1262 (nfunction 1231 1263 native-utf-16-vector-encode 1232 (lambda ( char vector index)1264 (lambda (string vector idx &optional (start 0) (end (length string))) 1233 1265 (declare (type (simple-array (unsigned-byte 16) (*)) vector) 1234 (type index index) 1235 (optimize (speed 3) (safety 0))) 1236 (let* ((len (length vector)) 1237 (code (char-code char)) 1238 (highbits (- code #x10000))) 1239 (declare (type index len) 1240 (type (mod #x110000) code) 1241 (fixnum highbits)) 1242 (if (< highbits 0) 1243 (when (< index len) 1244 (setf (aref vector index) code) 1245 (the fixnum (+ index 1))) 1246 (let* ((i1 (1+ index))) 1247 (declare (fixnum i1)) 1248 (when (< i1 len) 1249 (setf (aref vector index) (logior #xd800 (the fixnum (ash highbits -10))) 1250 (aref vector i1) (logior #xdc00 (the fixnum (logand highbits #x3ff)))) 1251 (the fixnum (1+ i1)))))))) 1266 (fixnum idx)) 1267 (do* ((i start (1+ i))) 1268 ((>= i end) idx) 1269 (let* ((char (schar string i)) 1270 (code (char-code char)) 1271 (highbits (- code #x10000))) 1272 (declare (type (mod #x110000) code) 1273 (fixnum highbits)) 1274 (cond ((< highbits 0) 1275 (setf (aref vector idx) code) 1276 (incf idx)) 1277 (t 1278 (setf (aref vector idx) (logior #xd800 (the fixnum (ash highbits -10)))) 1279 (incf idx) 1280 (setf (aref vector ) (logior #xdc00 (the fixnum (logand highbits #x3ff)))) 1281 (incf idx))))))) 1252 1282 :vector-decode-function 1253 1283 (nfunction 1254 1284 native-utf-16-vector-decode 1255 (lambda (vector idx )1285 (lambda (vector idx nunits string) 1256 1286 (declare (type (simple-array (unsigned-byte 16) (*)) vector) 1257 1287 (type index idx)) 1258 (let* ((len (length vector))) 1259 (declare (fixnum len)) 1260 (if (>= idx len) 1288 (do* ((i 0 (1+ i)) 1289 (len (length vector)) 1290 (index idx (1+ index))) 1291 ((>= i nunits) (values string index)) 1292 (declare (fixnum i len index)) 1293 (if (>= index len) 1261 1294 (values nil idx) 1262 (let* ((1st-unit (aref vector i dx)))1295 (let* ((1st-unit (aref vector index))) 1263 1296 (declare (type (unsigned-byte 16) 1st-unit)) 1264 (if (or (< 1st-unit #xd800) 1265 (>= 1st-unit #xe000)) 1266 (values (code-char 1st-unit) 1267 (the fixnum (1+ idx))) 1268 (if (>= 1st-unit #xdc00) 1269 (values nil idx) 1270 (let* ((i1 (1+ idx))) 1271 (declare (fixnum i1)) 1272 (if (>= i1 len) 1273 (values nil idx) 1274 (let* ((2nd-unit (aref vector i1))) 1275 (declare (type (unsigned-byte 16) 2nd-unit)) 1276 (if (and (>= 2nd-unit #xdc00) 1277 (< 2nd-unit #xe000)) 1278 (values 1279 (code-char (the (unsigned-byte 21) 1280 (logior 1281 (the (unsigned-byte 20) (ash (the (unsigned-byte 10) 1282 (- 1st-unit #xd800)) 1283 10)) 1284 (the (unsigned-byte 10) (- 2nd-unit #xdc00))))) 1285 (the fixnum (1+ i1))) 1286 (values nil idx)))))))))))) 1297 (let* ((char 1298 (if (or (< 1st-unit #xd800) 1299 (>= 1st-unit #xe000)) 1300 (code-char 1st-unit) 1301 (if (< 1st-unit #xdc00) 1302 (let* ((2nd-unit (aref vector (incf index)))) 1303 (declare (type (unsigned-byte 16) 2nd-unit)) 1304 (if (and (>= 2nd-unit #xdc00) 1305 (< 2nd-unit #xe000)) 1306 (code-char (the (unsigned-byte 21) 1307 (logior 1308 (the (unsigned-byte 20) (ash (the (unsigned-byte 10) 1309 (- 1st-unit #xd800)) 1310 10)) 1311 (the (unsigned-byte 10) (- 2nd-unit #xdc00))))))))))) 1312 (if char 1313 (setf (schar string i) char) 1314 (return (values nil idx))))))))) 1287 1315 :memory-encode-function 1288 1316 (nfunction 1289 1317 native-utf-16-memory-encode 1290 (lambda ( char pointer idx)1318 (lambda (string pointer idx &optional (start 0) (end (length string))) 1291 1319 (declare (fixnum idx)) 1292 (let* ((code (char-code char)) 1293 (highbits (- code #x10000)) 1294 (i0 (+ idx idx)) 1295 (i1 (+ i0 2))) 1296 (declare (type (mod #x110000) code) 1297 (fixnum i0 i1 highbits)) 1320 (do* ((i start (1+ i))) 1321 ((>= i end) idx) 1322 (let* ((code (char-code (schar string i))) 1323 (highbits (- code #x10000)) 1324 (p (+ idx idx))) 1325 (declare (type (mod #x110000) code) 1326 (fixnum p highbits)) 1298 1327 (cond ((< highbits 0) 1299 (setf (%get-unsigned-word pointer i0) code) 1300 (the fixnum (1+ idx))) 1328 (setf (%get-unsigned-word pointer p) code) 1329 (incf idx) 1330 (incf p 2)) 1301 1331 1302 1332 (t 1303 (setf (%get-unsigned-word pointer i0) (logior #xd800 (the fixnum (ash highbits -10))) 1304 (%get-unsigned-word pointer i1) (logior #xdc00 (the fixnum (logand highbits #x3ff)))) 1305 1306 (the fixnum (+ idx 2))))))) 1333 (setf (%get-unsigned-word pointer p) (logior #xd800 (the fixnum (ash highbits -10)))) 1334 (incf idx) 1335 (incf p 2) 1336 (setf (%get-unsigned-word pointer p) (logior #xdc00 (the fixnum (logand highbits #x3ff)))) 1337 (incf idx) 1338 (incf p 2))))))) 1307 1339 :memory-decode-function 1308 1340 (nfunction 1309 1341 native-utf-16-memory-decode 1310 (lambda (pointer idx) 1311 (declare (fixnum idx)) 1312 (let* ((i0 (+ idx idx)) 1313 (1st-unit (%get-unsigned-word pointer i0)) 1314 (i1 (+ i0 2))) 1315 (declare (type (unsigned-byte 16) 1st-unit) 1316 (fixnum i1 i2 i3)) 1317 (if (or (< 1st-unit #xd800) 1318 (>= 1st-unit #xe000)) 1319 (values (code-char 1st-unit) (the fixnum (1+ idx))) 1320 (if (< 1st-unit #xdc00) 1321 (let* ((2nd-unit (%get-unsigned-word pointer i1))) 1322 (declare (type (unsigned-byte 16) 2nd-unit)) 1323 (if (and (>= 2nd-unit #xdc00) 1324 (< 2nd-unit #xe000)) 1325 (values 1326 (code-char (the (unsigned-byte 21) 1327 (logior 1328 (the (unsigned-byte 20) (ash (the (unsigned-byte 10) 1329 (- 1st-unit #xd800)) 1330 10)) 1331 (the (unsigned-byte 10) (- 2nd-unit #xdc00))))) 1332 (the fixnum (+ idx 2)))))))))) 1342 (lambda (pointer nunits idx string) 1343 (declare (fixnum nunits idx)) 1344 (do* ((i 0 (1+ i)) 1345 (index idx (1+ index)) 1346 (p (+ index index) (+ p 2))) 1347 ((>= i nunits) (values string index)) 1348 (declare (fixnum i index p)) 1349 (let* ((1st-unit (%get-unsigned-word pointer p))) 1350 (declare (type (unsigned-byte 16) 1st-unit)) 1351 (let* ((char 1352 (if (or (< 1st-unit #xd800) 1353 (>= 1st-unit #xe000)) 1354 (code-char 1st-unit) 1355 (if (< 1st-unit #xdc00) 1356 (let* ((2nd-unit (%get-unsigned-byte pointer (incf p 2)))) 1357 (declare (type (unsigned-byte 16) 2nd-unit)) 1358 (incf index) 1359 (if (and (>= 2nd-unit #xdc00) 1360 (< 2nd-unit #xe000)) 1361 (code-char (the (unsigned-byte 21) 1362 (logior 1363 (the (unsigned-byte 20) (ash (the (unsigned-byte 10) 1364 (- 1st-unit #xd800)) 1365 10)) 1366 (the (unsigned-byte 10) (- 2nd-unit #xdc00))))))))))) 1367 (if char 1368 (setf (schar string i) char) 1369 (return (values nil idx)))))))) 1333 1370 :units-in-string-function 1334 1371 #'utf-16-units-in-string … … 1343 1380 (if (= i end) nchars)) 1344 1381 (let* ((code (aref vector i))) 1345 (declare (type (unsigned-byte 8) code))1382 (declare (type (unsigned-byte 16) code)) 1346 1383 (incf i 1347 1384 (if (or (< code #xd800) … … 1351 1388 :length-of-memory-encoding-function 1352 1389 (nfunction 1353 native-utf- 8-length-of-memory-encoding1390 native-utf-16-length-of-memory-encoding 1354 1391 (lambda (pointer nunits &optional (start 0)) 1355 1392 (do* ((i start) … … 1382 1419 (nfunction 1383 1420 reversed-utf-16-vector-encode 1384 (lambda ( char vector index)1421 (lambda (string vector idx &optional (start 0) (end (length string))) 1385 1422 (declare (type (simple-array (unsigned-byte 16) (*)) vector) 1386 (type index index) 1387 (optimize (speed 3) (safety 0))) 1388 (let* ((len (length vector)) 1389 (code (char-code char)) 1390 (highbits (- code #x10000))) 1391 (declare (type index len) 1392 (type (mod #x110000) code) 1393 (fixnum highbits)) 1394 (if (< highbits 0) 1395 (when (< index len) 1396 (setf (aref vector index) (%swap-u16 code)) 1397 (the fixnum (+ index 1))) 1398 (let* ((i1 (1+ index))) 1399 (declare (fixnum i1)) 1400 (when (< i1 len) 1401 (setf (aref vector index) 1402 (%swap-u16 (logior #xd800 (the fixnum (ash highbits -10)))) 1403 (aref vector i1) 1404 (%swap-u16 (logior #xdc00 (the fixnum (logand highbits #x3ff))))) 1405 (the fixnum (1+ i1)))))))) 1423 (fixnum idx)) 1424 (do* ((i start (1+ i))) 1425 ((>= i end) idx) 1426 (let* ((char (schar string i)) 1427 (code (char-code char)) 1428 (highbits (- code #x10000))) 1429 (declare (type (mod #x110000) code) 1430 (fixnum highbits)) 1431 (cond ((< highbits 0) 1432 (setf (aref vector idx) (%swap-u16 code)) 1433 (incf idx)) 1434 (t 1435 (setf (aref vector idx) (%swap-u16 (logior #xd800 (the fixnum (ash highbits -10))))) 1436 (incf idx) 1437 (setf (aref vector idx) (%swap-u16 (logior #xdc00 (the fixnum (logand highbits #x3ff))))) 1438 (incf idx))))))) 1406 1439 :vector-decode-function 1407 1440 (nfunction 1408 1441 reversed-utf-16-vector-decode 1409 (lambda (vector idx )1442 (lambda (vector idx nunits string) 1410 1443 (declare (type (simple-array (unsigned-byte 16) (*)) vector) 1411 1444 (type index idx)) 1412 (let* ((len (length vector))) 1413 (declare (fixnum len)) 1414 (if (>= idx len) 1445 (do* ((i 0 (1+ i)) 1446 (len (length vector)) 1447 (index idx (1+ index))) 1448 ((>= i nunits) (values string index)) 1449 (declare (fixnum i len index)) 1450 (if (>= index len) 1415 1451 (values nil idx) 1416 (let* ((1st-unit (%swap-u16 (aref vector i dx))))1452 (let* ((1st-unit (%swap-u16 (aref vector index)))) 1417 1453 (declare (type (unsigned-byte 16) 1st-unit)) 1418 (if (or (< 1st-unit #xd800) 1419 (>= 1st-unit #xe000)) 1420 (values (code-char 1st-unit) 1421 (the fixnum (1+ idx))) 1422 (if (>= 1st-unit #xdc00) 1423 (values nil idx) 1424 (let* ((i1 (1+ idx))) 1425 (declare (fixnum i1)) 1426 (if (>= i1 len) 1427 (values nil idx) 1428 (let* ((2nd-unit (%swap-u16 (aref vector i1)))) 1429 (declare (type (unsigned-byte 16) 2nd-unit)) 1430 (if (and (>= 2nd-unit #xdc00) 1431 (< 2nd-unit #xe000)) 1432 (values 1433 (code-char (the (unsigned-byte 21) 1434 (logior 1435 (the (unsigned-byte 20) (ash (the (unsigned-byte 10) 1436 (- 1st-unit #xd800)) 1437 10)) 1438 (the (unsigned-byte 10) (- 2nd-unit #xdc00))))) 1439 (the fixnum (1+ i1))) 1440 (values nil idx)))))))))))) 1454 (let* ((char 1455 (if (or (< 1st-unit #xd800) 1456 (>= 1st-unit #xe000)) 1457 (code-char 1st-unit) 1458 (if (< 1st-unit #xdc00) 1459 (let* ((2nd-unit (%swap-u16 (aref vector (incf index))))) 1460 (declare (type (unsigned-byte 16) 2nd-unit)) 1461 (if (and (>= 2nd-unit #xdc00) 1462 (< 2nd-unit #xe000)) 1463 (code-char (the (unsigned-byte 21) 1464 (logior 1465 (the (unsigned-byte 20) (ash (the (unsigned-byte 10) 1466 (- 1st-unit #xd800)) 1467 10)) 1468 (the (unsigned-byte 10) (- 2nd-unit #xdc00))))))))))) 1469 (if char 1470 (setf (schar string i) char) 1471 (return (values nil idx))))))))) 1441 1472 :memory-encode-function 1442 1473 (nfunction 1443 1474 reversed-utf-16-memory-encode 1444 (lambda ( char pointer idx)1475 (lambda (string pointer idx &optional (start 0) (end (length string))) 1445 1476 (declare (fixnum idx)) 1446 (let* ((code (char-code char)) 1447 (highbits (- code #x10000)) 1448 (i0 (+ idx idx)) 1449 (i1 (+ i0 2))) 1450 (declare (type (mod #x110000) code) 1451 (fixnum i0 i1 highbits)) 1477 (do* ((i start (1+ i))) 1478 ((>= i end) idx) 1479 (let* ((code (char-code (schar string i))) 1480 (highbits (- code #x10000)) 1481 (p (+ idx idx))) 1482 (declare (type (mod #x110000) code) 1483 (fixnum p highbits)) 1452 1484 (cond ((< highbits 0) 1453 (setf (%get-unsigned-word pointer i0) (%swap-u16 code)) 1454 (the fixnum (1+ idx))) 1485 (setf (%get-unsigned-word pointer p) (%swap-u16 code)) 1486 (incf idx) 1487 (incf p 2)) 1488 1455 1489 (t 1456 (setf (%get-unsigned-word pointer i0) 1457 (%swap-u16 (logior #xd800 (the fixnum (ash highbits -10)))) 1458 (%get-unsigned-word pointer i1) 1459 (%swap-u16 (logior #xdc00 (the fixnum (logand highbits #x3ff))))) 1460 (the fixnum (+ idx 2))))))) 1490 (setf (%get-unsigned-word pointer p) (%swap-u16 (logior #xd800 (the fixnum (ash highbits -10))))) 1491 (incf idx) 1492 (incf p 2) 1493 (setf (%get-unsigned-word pointer p) (%swap-u16 (logior #xdc00 (the fixnum (logand highbits #x3ff))))) 1494 (incf idx) 1495 (incf p 2))))))) 1461 1496 :memory-decode-function 1462 1497 (nfunction 1463 1498 reversed-utf-16-memory-decode 1464 (lambda (pointer idx) 1465 (declare (fixnum idx)) 1466 (let* ((i0 (+ idx idx)) 1467 (1st-unit (%swap-u16 (%get-unsigned-word pointer i0))) 1468 (i1 (+ i0 2))) 1469 (declare (type (unsigned-byte 16) 1st-unit) 1470 (fixnum i1 i2 i3)) 1471 (if (or (< 1st-unit #xd800) 1472 (>= 1st-unit #xe000)) 1473 (values (code-char 1st-unit) (the fixnum (1+ idx))) 1474 (if (< 1st-unit #xdc00) 1475 (let* ((2nd-unit (%swap-u16 (%get-unsigned-word pointer i1)))) 1476 (declare (type (unsigned-byte 16) 2nd-unit)) 1477 (if (and (>= 2nd-unit #xdc00) 1478 (< 2nd-unit #xe000)) 1479 (values 1480 (code-char (the (unsigned-byte 21) 1481 (logior 1482 (the (unsigned-byte 20) (ash (the (unsigned-byte 10) 1483 (- 1st-unit #xd800)) 1484 10)) 1485 (the (unsigned-byte 10) (- 2nd-unit #xdc00))))) 1486 (the fixnum (+ idx 2)))))))))) 1499 (lambda (pointer nunits idx string) 1500 (declare (fixnum nunits idx)) 1501 (do* ((i 0 (1+ i)) 1502 (index idx (1+ index)) 1503 (p (+ index index) (+ p 2))) 1504 ((>= i nunits) (values string index)) 1505 (declare (fixnum i index p)) 1506 (let* ((1st-unit (%swap-u16 (%get-unsigned-word pointer p)))) 1507 (declare (type (unsigned-byte 16) 1st-unit)) 1508 (let* ((char 1509 (if (or (< 1st-unit #xd800) 1510 (>= 1st-unit #xe000)) 1511 (code-char 1st-unit) 1512 (if (< 1st-unit #xdc00) 1513 (let* ((2nd-unit (%swap-u16 (%get-unsigned-byte pointer (incf p 2))))) 1514 (declare (type (unsigned-byte 16) 2nd-unit)) 1515 (incf index) 1516 (if (and (>= 2nd-unit #xdc00) 1517 (< 2nd-unit #xe000)) 1518 (code-char (the (unsigned-byte 21) 1519 (logior 1520 (the (unsigned-byte 20) (ash (the (unsigned-byte 10) 1521 (- 1st-unit #xd800)) 1522 10)) 1523 (the (unsigned-byte 10) (- 2nd-unit #xdc00))))))))))) 1524 (if char 1525 (setf (schar string i) char) 1526 (return (values nil idx)))))))) 1487 1527 :units-in-string-function 1488 1528 #'utf-16-units-in-string … … 1497 1537 (if (= i end) nchars)) 1498 1538 (let* ((code (%swap-u16 (aref vector i)))) 1499 (declare (type (unsigned-byte 8) code))1539 (declare (type (unsigned-byte 16) code)) 1500 1540 (incf i 1501 1541 (if (or (< code #xd800) … … 1505 1545 :length-of-memory-encoding-function 1506 1546 (nfunction 1507 reversed-utf- 8-length-of-memory-encoding1547 reversed-utf-16-length-of-memory-encoding 1508 1548 (lambda (pointer nunits &optional (start 0)) 1509 1549 (do* ((i start) … … 1523 1563 ) 1524 1564 1525 ;;; UTF-16. 1565 ;;; UTF-16. Memory and vector functions determine endianness of 1566 ;;; input by the presence of a byte-order mark (or swapped BOM) 1567 ;;; at the beginning of input, and assume big-endian order 1568 ;;; if this mark is missing; on output, a BOM is prepended and 1569 ;;; things are written in native byte order. 1570 ;;; The endianness of stream-io operations is determined by 1571 ;;; stream content; new output streams are written in native 1572 ;;; endianness with a BOM character prepended. Input streams 1573 ;;; are read in native byte order if the initial character is 1574 ;;; a BOM, in reversed byte order if the initial character is 1575 ;;; a swapped BOM, and in big-endian order (per RFC 2781) if 1576 ;;; there is no BOM. 1577 1578 (define-character-encoding 1579 :utf-16 1580 :max-units-per-char 2 1581 :code-unit-size 16 1582 :native-endianness t ;not necessarily true. 1583 :stream-encode-function 1584 #'utf-16-stream-encode 1585 :stream-decode-function 1586 #'utf-16-stream-decode 1587 :vector-encode-function 1588 (nfunction 1589 utf-16-vector-encode 1590 (lambda (string vector idx &optional (start 0) (end (length string))) 1591 (declare (type (simple-array (unsigned-byte 8) (*)) vector) 1592 (fixnum idx)) 1593 (when (> end start) 1594 (setf (aref vector idx) byte-order-mark-char-code) 1595 (incf idx)) 1596 (do* ((i start (1+ i))) 1597 ((>= i end) idx) 1598 (let* ((char (schar string i)) 1599 (code (char-code char)) 1600 (highbits (- code #x10000))) 1601 (declare (type (mod #x110000) code) 1602 (fixnum highbits)) 1603 (cond ((< highbits 0) 1604 (setf (aref vector idx) code) 1605 (incf idx)) 1606 (t 1607 (setf (aref vector idx) (logior #xd800 (the fixnum (ash highbits -10)))) 1608 (incf idx) 1609 (setf (aref vector idx) (logior #xdc00 (the fixnum (logand highbits #x3ff)))) 1610 (incf idx))))))) 1611 :vector-decode-function 1612 (nfunction 1613 utf-16-vector-decode 1614 (lambda (vector idx nunits string) 1615 (declare (type (simple-array (unsigned-byte 16) (*)) vector) 1616 (type index idx)) 1617 (let* ((len (length vector)) 1618 (swap (if (> len idx) 1619 (case (aref vector idx) 1620 (#.byte-order-mark-char-code 1621 (incf idx) nil) 1622 (#.swapped-byte-order-mark-char-code 1623 (incf idx t)) 1624 (t #+little-endian-target t))))) 1625 1626 (do* ((i 0 (1+ i)) 1627 (index idx (1+ index))) 1628 ((>= i nunits) (values string index)) 1629 (declare (fixnum i len index)) 1630 (if (>= index len) 1631 (values nil idx) 1632 (let* ((1st-unit (aref vector index))) 1633 (declare (type (unsigned-byte 16) 1st-unit)) 1634 (if swap (setq 1st-unit (%swap-u16 1st-unit))) 1635 (let* ((char 1636 (if (or (< 1st-unit #xd800) 1637 (>= 1st-unit #xe000)) 1638 (code-char 1st-unit) 1639 (if (< 1st-unit #xdc00) 1640 (let* ((2nd-unit (aref vector (incf index)))) 1641 (declare (type (unsigned-byte 16) 2nd-unit)) 1642 (if swap (setq 2nd-unit (%swap-u16 2nd-unit))) 1643 (if (and (>= 2nd-unit #xdc00) 1644 (< 2nd-unit #xe000)) 1645 (code-char (the (unsigned-byte 21) 1646 (logior 1647 (the (unsigned-byte 20) (ash (the (unsigned-byte 10) 1648 (- 1st-unit #xd800)) 1649 10)) 1650 (the (unsigned-byte 10) (- 2nd-unit #xdc00))))))))))) 1651 (if char 1652 (setf (schar string i) char) 1653 (return (values nil idx)))))))))) 1654 :memory-encode-function 1655 (nfunction 1656 utf-16-memory-encode 1657 (lambda (string pointer idx &optional (start 0) (end (length string))) 1658 (declare (fixnum idx)) 1659 (when (> end start) 1660 (setf (%get-unsigned-word pointer (+ idx idx)) 1661 byte-order-mark-char-code) 1662 (incf idx)) 1663 (do* ((i start (1+ i))) 1664 ((>= i end) idx) 1665 (let* ((code (char-code (schar string i))) 1666 (highbits (- code #x10000)) 1667 (p (+ idx idx))) 1668 (declare (type (mod #x110000) code) 1669 (fixnum p highbits)) 1670 (cond ((< highbits 0) 1671 (setf (%get-unsigned-word pointer p) code) 1672 (incf idx) 1673 (incf p 2)) 1674 1675 (t 1676 (setf (%get-unsigned-word pointer p) (logior #xd800 (the fixnum (ash highbits -10)))) 1677 (incf idx) 1678 (incf p 2) 1679 (setf (%get-unsigned-word pointer p) (logior #xdc00 (the fixnum (logand highbits #x3ff)))) 1680 (incf idx) 1681 (incf p 2))))))) 1682 :memory-decode-function 1683 (nfunction 1684 utf-16-memory-decode 1685 (lambda (pointer nunits idx string) 1686 (declare (fixnum nunits idx)) 1687 (let* ((swap (when (> nunits 0) 1688 (case (%get-unsigned-word pointer (+ idx idx)) 1689 (#.byte-order-mark-char-code 1690 (incf idx) 1691 (decf nunits) 1692 nil) 1693 (#.swapped-byte-order-mark-char-code 1694 (incf idx) 1695 (decf nunits) 1696 t) 1697 (t #+little-endian-target t))))) 1698 (do* ((i 0 (1+ i)) 1699 (index idx (1+ index)) 1700 (p (+ index index) (+ p 2))) 1701 ((>= i nunits) (values string index)) 1702 (declare (fixnum i index p)) 1703 (let* ((1st-unit (%get-unsigned-word pointer p))) 1704 (declare (type (unsigned-byte 16) 1st-unit)) 1705 (if swap (setq 1st-unit (%swap-u16 1st-unit))) 1706 (let* ((char 1707 (if (or (< 1st-unit #xd800) 1708 (>= 1st-unit #xe000)) 1709 (code-char 1st-unit) 1710 (if (< 1st-unit #xdc00) 1711 (let* ((2nd-unit (%get-unsigned-byte pointer (incf p 2)))) 1712 (declare (type (unsigned-byte 16) 2nd-unit)) 1713 (if swap (setq 2nd-unit (%swap-u16 2nd-unit))) 1714 (incf index) 1715 (if (and (>= 2nd-unit #xdc00) 1716 (< 2nd-unit #xe000)) 1717 (code-char (the (unsigned-byte 21) 1718 (logior 1719 (the (unsigned-byte 20) (ash (the (unsigned-byte 10) 1720 (- 1st-unit #xd800)) 1721 10)) 1722 (the (unsigned-byte 10) (- 2nd-unit #xdc00))))))))))) 1723 (if char 1724 (setf (schar string i) char) 1725 (return (values nil idx))))))))) 1726 :units-in-string-function 1727 ;; Note that this does -not- include the BOM. 1728 #'utf-16-units-in-string 1729 :length-of-vector-encoding-function 1730 (nfunction 1731 utf-16-length-of-vector-encoding 1732 (lambda (vector &optional (start 0) (end (length vector))) 1733 (declare (type (simple-array (unsigned-byte 16) (*)) vector)) 1734 (let* ((swap (when (> end start) 1735 (case (aref vector start) 1736 (#.byte-order-mark-char-code 1737 (incf start) 1738 nil) 1739 (#.swapped-byte-order-mark-char-code 1740 (incf start) 1741 t) 1742 (t #+little-endian-target t))))) 1743 (do* ((i start) 1744 (nchars 0 (1+ nchars))) 1745 ((>= i end) 1746 (if (= i end) nchars)) 1747 (let* ((code (aref vector i))) 1748 (declare (type (unsigned-byte 16) code)) 1749 (if swap (setq code (%swap-u16 code))) 1750 (incf i 1751 (if (or (< code #xd800) 1752 (>= code #xe000)) 1753 1 1754 2))))))) 1755 :length-of-memory-encoding-function 1756 (nfunction 1757 utf-16-length-of-memory-encoding 1758 (lambda (pointer nunits &optional (start 0)) 1759 (let* ((swap (when (> nunits 1) 1760 (case (%get-unsigned-word pointer (+ start start)) 1761 (#.byte-order-mark-char-code 1762 (incf start) 1763 (decf nunits) 1764 nil) 1765 (#.swapped-byte-order-mark-char-code 1766 (incf start) 1767 (decf nunits) 1768 t) 1769 (t #+little-endian-target t))))) 1770 (do* ((i start) 1771 (p (+ start start) (+ p 2)) 1772 (nchars 0 (1+ nchars))) 1773 ((>= i nunits) 1774 (if (= i nunits) nchars)) 1775 (let* ((code (%get-unsigned-word pointer p))) 1776 (declare (type (unsigned-byte 16) code)) 1777 (if swap (setq code (%swap-u16 code))) 1778 (incf i 1779 (incf i 1780 (if (or (< code #xd800) 1781 (>= code #xe000)) 1782 1 1783 2)))))))) 1784 :literal-char-code-limit #x10000 1785 :use-byte-order-mark 1786 #+big-endian-target :utf-16le 1787 #+little-endian-target :utf-16be 1788 )
Note:
See TracChangeset
for help on using the changeset viewer.
