Changeset 9430


Ignore:
Timestamp:
May 9, 2008, 10:15:50 AM (11 years ago)
Author:
gb
Message:

Try to make STRING-EQUAL faster in general and faster in the case where
some keyword args were provided.

Use CHECK-SEQUENCE-BOUNDS on lots of string functions; :START defaults
to 0 more consistently.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • branches/working-0711-perf/ccl/lib/chars.lisp

    r9363 r9430  
    8181
    8282
    83 (defun %non-standard-lower-case-equivalent (char)
    84   (gethash char *non-standard-upper-to-lower*))
     83
    8584
    8685
     
    303302
    304303
    305 (defun string-downcase (string &key start end)
     304(defun string-downcase (string &key (start 0) end)
    306305  (setq string (copy-string-arg string))
    307   (if (not start) (setq start 0)(require-type start 'fixnum))
    308   (if (not end)(setq end (length string))(require-type end 'fixnum))
     306  (setq end (check-sequence-bounds string start end))
    309307  (%strdown string start end))
    310308
     
    326324                    (if (>= code #x80)
    327325                      (%non-standard-lower-case-equivalent ch)))))
    328       (declare (character ch) (type (mod #x11000) code))
     326      (declare (character ch) (type (mod #x110000) code))
    329327      (when lower
    330328        (setf (schar string i) lower)))))
     
    346344  (%substr string org (+ len org)))     
    347345
    348 (defun string-upcase (string &key start end)
     346(defun string-upcase (string &key (start 0) end)
    349347  (setq string (copy-string-arg string))
    350   (if (not start) (setq start 0)(require-type start 'fixnum))
    351   (if (not end)(setq end (length string))(require-type end 'fixnum))
     348  (setq end (check-sequence-bounds string start end))
    352349  (%strup string start end))
    353350
     
    368365                    (if (>= code #x80)
    369366                      (%non-standard-upper-case-equivalent ch)))))
    370       (declare (character ch) (type (mod #x11000) code))
     367      (declare (character ch) (type (mod #x110000) code))
    371368      (when upper
    372369        (setf (schar string i) upper)))))
     
    374371
    375372
    376 (defun string-capitalize (string &key start end)
     373(defun string-capitalize (string &key (start 0) end)
    377374  (setq string (copy-string-arg string))
    378   (if (not start) (setq start 0)(require-type start 'fixnum))
    379   (if (not end)(setq end (length string))(require-type end 'fixnum))
     375  (setq end (check-sequence-bounds string start end))
    380376  (%strcap string start end))
    381377
     
    402398
    403399
    404 (defun nstring-downcase (string &key start end)
     400(defun nstring-downcase (string &key (start 0) end)
    405401  (etypecase string
    406402    (string
    407      (if (not start) (setq start 0)(require-type start 'fixnum))
    408      (if (not end)(setq end (length string))(require-type end 'fixnum))
    409      (multiple-value-bind (sstring org) (array-data-and-offset string)
    410        (%strdown sstring (+ start org)(+ end org)))
     403     (setq end (check-sequence-bounds string start end))
     404     (if (typep string 'simple-string)
     405       (%strdown string start end)
     406       (multiple-value-bind (data offset) (array-data-and-offset string)
     407         (%strdown data (+ start offset) (+ end offset))))
    411408     string)))
    412409
    413 (defun nstring-upcase (string &key start end)
     410(defun nstring-upcase (string &key (start 0) end)
    414411  (etypecase string
    415412    (string
    416      (if (not start) (setq start 0)(require-type start 'fixnum))
    417      (if (not end)(setq end (length string))(require-type end 'fixnum))
    418      (multiple-value-bind (sstring org) (array-data-and-offset string)
    419        (%strup sstring (+ start org)(+ end org)))
     413     (setq end (check-sequence-bounds string start end))
     414     (if (typep string 'simple-string)
     415       (%strup string start end)
     416       (multiple-value-bind (data offset) (array-data-and-offset string)
     417         (%strup data (+ start offset) (+ end offset))))
    420418     string)))
    421419
    422420
    423 (defun nstring-capitalize (string &key start end)
     421(defun nstring-capitalize (string &key (start 0) end)
    424422  (etypecase string
    425423    (string
    426      (if (not start) (setq start 0)(require-type start 'fixnum))
    427      (if (not end)(setq end (length string))(require-type end 'fixnum))
    428      (multiple-value-bind (sstring org) (array-data-and-offset string)
    429        (%strcap sstring (+ start org)(+ end org)))
     424     (setq end (check-sequence-bounds string start end))
     425     (if (typep string 'simple-string)
     426       (%strcap string start end)
     427       (multiple-value-bind (data offset) (array-data-and-offset string)
     428         (%strcap data (+ start offset) (+ end offset))))
    430429     string)))
    431430
     
    503502    (if (eq result -1) nil pos)))
    504503
     504(declaim (inline %string-start-end))
     505(defun %string-start-end (string)
     506  (etypecase string
     507    (string (multiple-value-bind (data offset)
     508                (array-data-and-offset string)
     509              (declare (fixnum offset))
     510              (values data offset (+ offset (length string)))))
     511    (symbol (let* ((pname (symbol-name string)))
     512              (values pname 0 (length pname))))
     513    (character (let* ((data (make-string 1)))
     514                 (setf (schar data 0) string)
     515                 (values data 0 1)))))
     516                       
     517;;; This is generally a bit faster then the version that deals with
     518;;; user-supplied bounds, both because the caller avoids passing
     519;;; some extra arguments and because those bounds don't need to be
     520;;; validated.
     521(defun %fixed-string-equal (string1 string2)
     522  (let* ((start1 0)
     523         (end1 0)
     524         (start2 0)
     525         (end2 0))
     526    (declare (fixnum start1 end1 start2 end2))
     527    (if (typep string1 'simple-string)
     528      (setq end1 (uvsize string1))
     529      (multiple-value-setq (string1 start1 end1)
     530        (%string-start-end string1)))
     531    (if (typep string2 'simple-string)
     532      (setq end2 (uvsize string2))
     533      (multiple-value-setq (string2 start2 end2)
     534        (%string-start-end string2)))
     535    (locally
     536        (declare (optimize (speed 3)(safety 0))
     537                 (simple-string string1 string2))
     538      (when (= (the fixnum (- end1 start1))
     539               (the fixnum (- end2 start2)))
     540        (do* ((i start1 (1+ i))
     541              (j start2 (1+ j)))
     542             ((= i end1) t)
     543          (declare (fixnum i j))
     544          (let ((code1 (%scharcode string1 i))
     545                (code2 (%scharcode string2 j)))
     546            (declare (type (mod #x110000) code1 code2))
     547            (unless (= code1 code2)
     548              (if (and (>= code1 (char-code #\a))
     549                       (<= code1 (char-code #\z)))
     550                (setq code1 (- code1 (- (char-code #\a) (char-code #\A))))
     551                (if (> code1 #x80)
     552                  (setq code1 (%non-standard-char-code-upcase code1))))
     553              (if (and (>= code2 (char-code #\a))
     554                       (<= code2 (char-code #\z)))
     555                (setq code2 (- code2 (- (char-code #\a) (char-code #\A))))
     556                (if (> code2 #x80)
     557                  (setq code2 (%non-standard-char-code-upcase code2))))
     558              (unless (= code1 code2)
     559                (return)))))))))
     560
     561;;; Some of the start1/end1/start2/end2 args may be bogus.
     562(defun %bounded-string-equal (string1 string2 start1 end1 start2 end2)
     563  (let* ((disp1 nil)
     564         (len1 0)
     565         (disp2 nil)
     566         (len2 0))
     567    (declare (fixnum len1 len2))
     568    (if (typep string1 'simple-string)
     569      (setq len1 (length (the sumple-string string1)))
     570      (etypecase string1
     571        (string (setq len1 (length string1))
     572                (multiple-value-setq (string1 disp1)
     573                  (array-data-and-offset string1)))
     574        (symbol (setq string1 (symbol-name string1)
     575                      len1 (length (the simple-string string1))))
     576        (character (setq string1 (make-string 1 :initial-element string1)
     577                         len1 1))))
     578    (if (typep string2 'simple-string)
     579      (setq len2 (length (the sumple-string string2)))
     580      (etypecase string2
     581        (string (setq len2 (length string2))
     582                (multiple-value-setq (string2 disp2)
     583                  (array-data-and-offset string2)))
     584        (symbol (setq string2 (symbol-name string2)
     585                      len1 (length (the simple-string string2))))
     586        (character (setq string2 (make-string 1 :initial-element string2)
     587                         len1 1))))
     588    (flet ((bad-index (index vector) (error "Index ~s is invalid for ~s" index vector)))
     589      (if (null start1)
     590        (setq start1 0)
     591        (when (or (not (typep start1 'fixnum))
     592                  (< (the fixnum start1) 0))
     593          (bad-index start1 string1)))
     594      (if (null end1)
     595        (setq end1 len1)
     596        (when (or (not (typep end1 'fixnum))
     597                  (< (the fixnum end1) 0)
     598                  (> (the fixnum end1) len1)
     599                  (bad-index end1 string1))))
     600      (locally (declare (fixnum start1 end1))
     601        (if (> start1 end1)
     602          (error ":start1 argument ~s exceeds :end1 argument ~s" start1 end1))
     603        (when disp1
     604          (incf start1 (the fixnum disp1))
     605          (incf end1 (the fixnum disp1)))
     606        (if (null start2)
     607          (setq start2 0)
     608          (when (or (not (typep start2 'fixnum))
     609                    (< (the fixnum start2) 0))
     610            (bad-index start2 string2)))
     611        (if (null end2)
     612          (setq end2 len2)
     613          (when (or (not (typep end2 'fixnum))
     614                    (< (the fixnum end2) 0)
     615                    (> (the fixnum end2) len2)
     616                    (bad-index end2 string2))))
     617        (locally (declare (fixnum start2 end2))
     618          (if (> start2 end2)
     619            (error ":start2 argument ~s exceeds :end2 argument ~s" start1 end1))
     620          (when disp2
     621            (incf start2 (the fixnum disp2))
     622            (incf end2 (the fixnum disp2)))
     623          (locally
     624              (declare (optimize (speed 3)(safety 0))
     625                       (simple-string string1 string2))
     626            (when (= (the fixnum (- end1 start1))
     627                     (the fixnum (- end2 start2)))
     628              (do* ((i start1 (1+ i))
     629                    (j start2 (1+ j)))
     630                   ((= i end1) t)
     631                (declare (fixnum i j))
     632                (let ((code1 (%scharcode string1 i))
     633                      (code2 (%scharcode string2 j)))
     634                  (declare (type (mod #x110000) code1 code2))
     635                  (unless (= code1 code2)
     636                    (if (and (>= code1 (char-code #\a))
     637                             (<= code1 (char-code #\z)))
     638                      (setq code1 (- code1 (- (char-code #\a) (char-code #\A))))
     639                      (if (> code1 #x80)
     640                        (setq code1 (%non-standard-char-code-upcase code1))))
     641                    (if (and (>= code2 (char-code #\a))
     642                             (<= code2 (char-code #\z)))
     643                      (setq code2 (- code2 (- (char-code #\a) (char-code #\A))))
     644                      (if (> code2 #x80)
     645                        (setq code2 (%non-standard-char-code-upcase code2))))
     646                    (unless (= code1 code2)
     647                      (return))))))))))))
     648
    505649(defun string-equal (string1 string2 &key start1 end1 start2 end2)
    506650  "Given two strings (string1 and string2), and optional integers start1,
    507651  start2, end1 and end2, compares characters in string1 to characters in
    508652  string2 (using char-equal)."
    509   (%string-equal (if (and (typep string1 'simple-string) (null start1) (null end1))
    510                    (progn
    511                      (setq start1 0 end1 (length string1))
    512                      string1)
    513                    (multiple-value-setq (string1 start1 end1)
    514                      (string-start-end string1 start1 end1)))
    515                  start1 end1
    516                  (if (and (typep string2 'simple-string) (null start2) (null end2))
    517                    (progn
    518                      (setq start2 0 end2 (length string2))
    519                      string2)
    520                    (multiple-value-setq (string2 start2 end2)
    521                      (string-start-end string2 start2 end2)))
    522                  start2 end2))
    523 
    524 ;; For use by optimizer
    525 (defun string-equal-no-keys (string1 string2 &aux start1 end1  start2 end2)
    526   (if (typep string1 'simple-string)
    527     (setq start1 0 end1 (length string1))
    528     (multiple-value-setq (string1 start1 end1)
    529       (string-start-end string1 start1 end1)))
    530   (if (typep string2 'simple-string)
    531     (setq start2 0 end2 (length string2))
    532     (multiple-value-setq (string2 start2 end2)
    533       (string-start-end string2 start2 end2)))
    534   (and (eq (%i- end1 start1) (%i- end2 start2))
    535        (%string-equal string1 start1 end1 string2 start2 end2)))
    536 
    537 (defun %string-equal (simple-string-1 start1 end1 simple-string-2 start2 end2)
    538   (declare (type simple-string simple-string-1 simple-string-2)
    539            (type fixnum start1 end1 start2 end2)
    540            (optimize (speed 3) (safety 0) (debug 0)))
    541   (let* ((len1 (- end1 start1))
    542          (len2 (- end2 start2)))
    543     (declare (type fixnum len1 len2))
    544     (when (/= len1 len2)
    545       (return-from %string-equal nil))
    546     (macrolet ((.char-equal (a b)
    547                  ;; not a typo. it's faster to make the char= check and, only if that fails, run the
    548                  ;; char-equal check.
    549                  `(or (char= ,a ,b) (char-equal ,a ,b)))
    550                (s1 (index)
    551                  `(uvref simple-string-1 ,index))
    552                (s2 (index)
    553                  `(uvref simple-string-2 ,index)))
    554      
    555       (do* ((index1 start1 (%i+ 1 index1))
    556             (index2 start2 (%i+ 1 index2))
    557             (char1  (s1 index1) (s1 index1))
    558             (char2  (s2 index2) (s2 index2)))
    559           ()
    560         (declare (type fixnum index1 index2)
    561                  (type character char1 char2))
    562         (when (eq index1 end1)
    563           (return-from %string-equal t))
    564         (when (not (.char-equal char1 char2))
    565           (return-from %string-equal nil))))))
     653  (if (or start1 end1 start2 end2)
     654    (%bounded-string-equal string1 string2 start1 end1 start2 end2)
     655    (%fixed-string-equal string1 string2)))
     656
     657
    566658
    567659(defun string-lessp (string1 string2 &key start1 end1 start2 end2)
     
    572664    (if (eq result -1) pos nil)))
    573665
    574 ; forget script-manager - just do codes
     666;;; forget script-manager - just do codes
    575667(defun string-cmp (string1 start1 end1 string2 start2 end2)
    576668  (let ((istart1 (or start1 0)))
Note: See TracChangeset for help on using the changeset viewer.