| [6] | 1 | ;;; -*- Log: hemlock.log; Package: hemlock-internals -*-
|
|---|
| 2 | ;;;
|
|---|
| 3 | ;;; **********************************************************************
|
|---|
| 4 | ;;; This code was written as part of the CMU Common Lisp project at
|
|---|
| 5 | ;;; Carnegie Mellon University, and has been placed in the public domain.
|
|---|
| 6 | ;;;
|
|---|
| 7 | #+CMU (ext:file-comment
|
|---|
| 8 | "$Header$")
|
|---|
| 9 | ;;;
|
|---|
| 10 | ;;; **********************************************************************
|
|---|
| 11 | ;;;
|
|---|
| 12 | ;;; Reluctantly written by Christopher Hoover
|
|---|
| 13 | ;;; Supporting cast includes Rob and Bill.
|
|---|
| 14 | ;;;
|
|---|
| 15 | ;;; This file defines a data structure, analogous to a Common Lisp
|
|---|
| 16 | ;;; hashtable, which translates strings to values and facilitates
|
|---|
| 17 | ;;; recognition and completion of these strings.
|
|---|
| 18 | ;;;
|
|---|
| 19 |
|
|---|
| 20 | (in-package :hemlock-internals)
|
|---|
| 21 |
|
|---|
| 22 | |
|---|
| 23 |
|
|---|
| 24 | ;;;; Implementation Details
|
|---|
| 25 |
|
|---|
| 26 | ;;; String tables are a data structure somewhat analogous to Common Lisp
|
|---|
| 27 | ;;; hashtables. String tables are case-insensitive. Functions are
|
|---|
| 28 | ;;; provided to quickly look up strings, insert strings, disambiguate or
|
|---|
| 29 | ;;; complete strings, and to provide a variety of ``help'' when
|
|---|
| 30 | ;;; disambiguating or completing strings.
|
|---|
| 31 | ;;;
|
|---|
| 32 | ;;; String tables are represented as a series of word tables which form
|
|---|
| 33 | ;;; a tree. Four structures are used to implement this data structure.
|
|---|
| 34 | ;;; The first is a STRING-TABLE. This structure has severals slots one
|
|---|
| 35 | ;;; of which, FIRST-WORD-TABLE, points to the first word table. This
|
|---|
| 36 | ;;; first word table is also the root of tree. The STRING-TABLE
|
|---|
| 37 | ;;; structure also contains slots to keep track of the number of nodes,
|
|---|
| 38 | ;;; the string table separator (which is used to distinguish word or
|
|---|
| 39 | ;;; field boundaries), and a pointer to an array of VALUE-NODE's.
|
|---|
| 40 | ;;;
|
|---|
| 41 | ;;; A WORD-TABLE is simply an array of pointers to WORD-ENTRY's. This
|
|---|
| 42 | ;;; array is kept sorted by the FOLDED slot in each WORD-ENTRY so that a
|
|---|
| 43 | ;;; binary search can be used. Each WORD-ENTRY contains a case-folded
|
|---|
| 44 | ;;; string and a pointer to the next WORD-TABLE in the tree. By
|
|---|
| 45 | ;;; traversing the tree made up by these structures, searching and
|
|---|
| 46 | ;;; completion can easily be done.
|
|---|
| 47 | ;;;
|
|---|
| 48 | ;;; Another structure, a VALUE-NODE, is used to hold each entry in the
|
|---|
| 49 | ;;; string table and contains both a copy of the original string and a
|
|---|
| 50 | ;;; case-folded version of the original string along with the value.
|
|---|
| 51 | ;;; All of these value nodes are stored in a array (pointed at by the
|
|---|
| 52 | ;;; VALUE-NODES slot of the STRING-TABLE structure) and sorted by the
|
|---|
| 53 | ;;; FOLDED slot in the VALUE-NODE structure so that a binary search may
|
|---|
| 54 | ;;; be used to quickly find existing strings.
|
|---|
| 55 | ;;;
|
|---|
| 56 |
|
|---|
| 57 | |
|---|
| 58 |
|
|---|
| 59 | ;;;; Structure Definitions
|
|---|
| 60 |
|
|---|
| 61 | (defparameter initial-string-table-size 20
|
|---|
| 62 | "Initial size of string table array for value nodes.")
|
|---|
| 63 | (defparameter initial-word-table-size 2
|
|---|
| 64 | "Inital size of each word table array for each tree node.")
|
|---|
| 65 |
|
|---|
| 66 | (defstruct (string-table
|
|---|
| 67 | (:constructor %make-string-table (separator))
|
|---|
| 68 | (:print-function print-string-table))
|
|---|
| 69 | "This structure is used to implement the Hemlock string-table type."
|
|---|
| 70 | ;; Character used to
|
|---|
| 71 | (separator #\Space :type base-char) ; character used for word separator
|
|---|
| 72 | (num-nodes 0 :type fixnum) ; number of nodes in string table
|
|---|
| 73 | (value-nodes (make-array initial-string-table-size)) ; value node array
|
|---|
| 74 | (first-word-table (make-word-table))) ; pointer to first WORD-TABLE
|
|---|
| 75 |
|
|---|
| 76 | (defun print-string-table (table stream depth)
|
|---|
| 77 | (declare (ignore table depth))
|
|---|
| 78 | (format stream "#<String Table>"))
|
|---|
| 79 |
|
|---|
| 80 | (defun make-string-table (&key (separator #\Space) initial-contents)
|
|---|
| 81 | "Creates and returns a Hemlock string-table. If Intitial-Contents is
|
|---|
| 82 | supplied in the form of an A-list of string-value pairs, these pairs
|
|---|
| 83 | will be used to initialize the table. If Separator, which must be a
|
|---|
| 84 | base-char, is specified then it will be used to distinguish word
|
|---|
| 85 | boundaries."
|
|---|
| 86 | (let ((table (%make-string-table separator)))
|
|---|
| 87 | (dolist (x initial-contents)
|
|---|
| 88 | (setf (getstring (car x) table) (cdr x)))
|
|---|
| 89 | table))
|
|---|
| 90 |
|
|---|
| 91 |
|
|---|
| 92 | (defstruct (word-table
|
|---|
| 93 | (:print-function print-word-table))
|
|---|
| 94 | "This structure is a word-table which is part of a Hemlock string-table."
|
|---|
| 95 | (num-words 0 :type fixnum) ; Number of words
|
|---|
| 96 | (words (make-array initial-word-table-size))) ; Array of WORD-ENTRY's
|
|---|
| 97 |
|
|---|
| 98 | (defun print-word-table (table stream depth)
|
|---|
| 99 | (declare (ignore table depth))
|
|---|
| 100 | (format stream "#<Word Table>"))
|
|---|
| 101 |
|
|---|
| 102 |
|
|---|
| 103 | (defstruct (word-entry
|
|---|
| 104 | (:constructor make-word-entry (folded))
|
|---|
| 105 | (:print-function print-word-entry))
|
|---|
| 106 | "This structure is an entry in a word table which is part of a Hemlock
|
|---|
| 107 | string-table."
|
|---|
| 108 | next-table ; Pointer to next WORD-TABLE
|
|---|
| 109 | folded ; Downcased word
|
|---|
| 110 | value-node) ; Pointer to value node or NIL
|
|---|
| 111 |
|
|---|
| 112 | (defun print-word-entry (entry stream depth)
|
|---|
| 113 | (declare (ignore depth))
|
|---|
| 114 | (format stream "#<Word Table Entry: \"~A\">" (word-entry-folded entry)))
|
|---|
| 115 |
|
|---|
| 116 |
|
|---|
| 117 | (defstruct (value-node
|
|---|
| 118 | (:constructor make-value-node (proper folded value))
|
|---|
| 119 | (:print-function print-value-node))
|
|---|
| 120 | "This structure is a node containing a value in a Hemlock string-table."
|
|---|
| 121 | folded ; Downcased copy of string
|
|---|
| 122 | proper ; Proper copy of string entry
|
|---|
| 123 | value) ; Value of entry
|
|---|
| 124 |
|
|---|
| 125 | (defun print-value-node (node stream depth)
|
|---|
| 126 | (declare (ignore depth))
|
|---|
| 127 | (format stream "<Value Node \"~A\">" (value-node-proper node)))
|
|---|
| 128 |
|
|---|
| 129 | |
|---|
| 130 |
|
|---|
| 131 | ;;;; Bi-SvPosition, String-Compare, String-Compare*
|
|---|
| 132 |
|
|---|
| 133 | ;;; Much like the CL function POSITION; however, this is a fast binary
|
|---|
| 134 | ;;; search for simple vectors. Vector must be a simple vector and Test
|
|---|
| 135 | ;;; must be a function which returns either :equal, :less, or :greater.
|
|---|
| 136 | ;;; (The vector must be sorted from lowest index to highest index by the
|
|---|
| 137 | ;;; Test function.) Two values are returned: the first is the position
|
|---|
| 138 | ;;; Item was found or if it was not found, where it should be inserted;
|
|---|
| 139 | ;;; the second is a boolean flag indicating whether or not Item was
|
|---|
| 140 | ;;; found.
|
|---|
| 141 | ;;;
|
|---|
| 142 | (defun bi-svposition (item vector test &key (start 0) end key)
|
|---|
| 143 | (declare (simple-vector vector) (fixnum start))
|
|---|
| 144 | (let ((low start)
|
|---|
| 145 | (high (if end end (length vector)))
|
|---|
| 146 | (mid 0))
|
|---|
| 147 | (declare (fixnum low high mid))
|
|---|
| 148 | (loop
|
|---|
| 149 | (when (< high low) (return (values low nil)))
|
|---|
| 150 | (setf mid (+ (the fixnum (ash (the fixnum (- high low)) -1)) low))
|
|---|
| 151 | (let* ((array-item (svref vector mid))
|
|---|
| 152 | (test-item (if key (funcall key array-item) array-item)))
|
|---|
| 153 | (ecase (funcall test item test-item)
|
|---|
| 154 | (:equal (return (values mid t)))
|
|---|
| 155 | (:less (setf high (1- mid)))
|
|---|
| 156 | (:greater (setf low (1+ mid))))))))
|
|---|
| 157 |
|
|---|
| 158 | ;;; A simple-string comparison appropriate for use with BI-SVPOSITION.
|
|---|
| 159 | ;;;
|
|---|
| 160 | (defun string-compare (s1 s2 &key (start1 0) end1 (start2 0) end2)
|
|---|
| 161 | (declare (simple-string s1 s2) (fixnum start1 start2))
|
|---|
| 162 | (let* ((end1 (or end1 (length s1)))
|
|---|
| 163 | (end2 (or end2 (length s2)))
|
|---|
| 164 | (pos1 (string/= s1 s2
|
|---|
| 165 | :start1 start1 :end1 end1 :start2 start2 :end2 end2)))
|
|---|
| 166 | (if (null pos1)
|
|---|
| 167 | :equal
|
|---|
| 168 | (let ((pos2 (+ (the fixnum pos1) (- start2 start1))))
|
|---|
| 169 | (declare (fixnum pos2))
|
|---|
| 170 | (cond ((= pos1 (the fixnum end1)) :less)
|
|---|
| 171 | ((= pos2 (the fixnum end2)) :greater)
|
|---|
| 172 | ((char< (schar s1 (the fixnum pos1)) (schar s2 pos2)) :less)
|
|---|
| 173 | (t :greater))))))
|
|---|
| 174 |
|
|---|
| 175 | ;;; Macro to return a closure to call STRING-COMPARE with the given
|
|---|
| 176 | ;;; keys.
|
|---|
| 177 | ;;;
|
|---|
| 178 | (defmacro string-compare* (&rest keys)
|
|---|
| 179 | `#'(lambda (x y) (string-compare x y ,@keys)))
|
|---|
| 180 |
|
|---|
| 181 | |
|---|
| 182 |
|
|---|
| 183 | ;;;; Insert-Element, Nconcf
|
|---|
| 184 |
|
|---|
| 185 | ;;; Insert-Element is a macro which encapsulates the hairiness of
|
|---|
| 186 | ;;; inserting an element into a simple vector. Vector should be a
|
|---|
| 187 | ;;; simple vector with Num elements (which may be less than or equal to
|
|---|
| 188 | ;;; the length of the vector) and Element is the element to insert at
|
|---|
| 189 | ;;; Pos. The optional argument Grow-Factor may be specified to control
|
|---|
| 190 | ;;; the new size of the array if a new vector is necessary. The result
|
|---|
| 191 | ;;; of INSERT-ELEMENT must be used as a new vector may be created.
|
|---|
| 192 | ;;; (Note that the arguments should probably be lexicals since some of
|
|---|
| 193 | ;;; them are evaluated more than once.)
|
|---|
| 194 | ;;;
|
|---|
| 195 | ;;; We clear out the old vector so that it won't hold on to garbage if it
|
|---|
| 196 | ;;; happens to be in static space.
|
|---|
| 197 | ;;;
|
|---|
| 198 | (defmacro insert-element (vector pos element num &optional (grow-factor 2))
|
|---|
| 199 | `(let ((new-num (1+ ,num))
|
|---|
| 200 | (max (length ,vector)))
|
|---|
| 201 | (declare (fixnum new-num max))
|
|---|
| 202 | (cond ((= ,num max)
|
|---|
| 203 | ;; grow the vector
|
|---|
| 204 | (let ((new (make-array (truncate (* max ,grow-factor)))))
|
|---|
| 205 | (declare (simple-vector new))
|
|---|
| 206 | ;; Blt the new buggers into place leaving a space for
|
|---|
| 207 | ;; the new element
|
|---|
| 208 | (replace new ,vector :end1 ,pos :end2 ,pos)
|
|---|
| 209 | (replace new ,vector :start1 (1+ ,pos) :end1 new-num
|
|---|
| 210 | :start2 ,pos :end2 ,num)
|
|---|
| 211 | (fill ,vector nil)
|
|---|
| 212 | (setf (svref new ,pos) ,element)
|
|---|
| 213 | new))
|
|---|
| 214 | (t
|
|---|
| 215 | ;; move the buggers down a slot
|
|---|
| 216 | (replace ,vector ,vector :start1 (1+ ,pos) :start2 ,pos)
|
|---|
| 217 | (setf (svref ,vector ,pos) ,element)
|
|---|
| 218 | ,vector))))
|
|---|
| 219 |
|
|---|
| 220 | (define-modify-macro nconcf (&rest args) nconc)
|
|---|
| 221 |
|
|---|
| 222 | |
|---|
| 223 |
|
|---|
| 224 | ;;;; With-Folded-String, Do-Words
|
|---|
| 225 |
|
|---|
| 226 | ;;; With-Folded-String is a macro which deals with strings from the
|
|---|
| 227 | ;;; user. First, if the original string is not a simple string then it
|
|---|
| 228 | ;;; is coerced to one. Next, the string is trimmed using the separator
|
|---|
| 229 | ;;; character and all separators between words are collapsed to a single
|
|---|
| 230 | ;;; separator. The word boundaries are pushed on to a list so that the
|
|---|
| 231 | ;;; Do-Words macro can be called anywhere within the dynamic extent of a
|
|---|
| 232 | ;;; With-Folded-String to ``do'' over the words.
|
|---|
| 233 |
|
|---|
| 234 | (defvar *separator-positions* nil)
|
|---|
| 235 |
|
|---|
| 236 | (defmacro do-words ((start-var end-var) &body body)
|
|---|
| 237 | (let ((sep-pos (gensym)))
|
|---|
| 238 | `(dolist (,sep-pos *separator-positions*)
|
|---|
| 239 | (let ((,start-var (car ,sep-pos))
|
|---|
| 240 | (,end-var (cdr ,sep-pos)))
|
|---|
| [12061] | 241 | (locally
|
|---|
| 242 | ,@body)))))
|
|---|
| 243 |
|
|---|
| 244 | (defmacro with-folded-string ((str-var len-var orig-str separator)
|
|---|
| 245 | &body body)
|
|---|
| 246 | `(let* ((,str-var (make-string (length ,orig-str)))
|
|---|
| 247 | (*separator-positions* nil))
|
|---|
| 248 | (declare (simple-string ,str-var)
|
|---|
| 249 | (dynamic-extent ,str-var))
|
|---|
| 250 | ;; make the string simple if it isn't already
|
|---|
| [6] | 251 | (unless (simple-string-p ,orig-str)
|
|---|
| [12061] | 252 | (setq ,orig-str (coerce ,orig-str 'simple-string)))
|
|---|
| [6] | 253 | ;; munge it into stack-allocated ,str-var and do the body
|
|---|
| 254 | (let ((,len-var (with-folded-munge-string ,str-var ,orig-str ,separator)))
|
|---|
| 255 | ,@body)))
|
|---|
| 256 |
|
|---|
| 257 | (defun with-folded-munge-string (buf str separator)
|
|---|
| 258 | (declare (simple-string str) (base-char separator))
|
|---|
| 259 | (let ((str-len (length str))
|
|---|
| 260 | (sep-pos nil)
|
|---|
| 261 | (buf-pos 0))
|
|---|
| 262 | ;; Bash the spaces out of the string remembering where the words are.
|
|---|
| 263 | (let ((start-pos (position separator str :test-not #'char=)))
|
|---|
| 264 | (when start-pos
|
|---|
| 265 | (loop
|
|---|
| 266 | (let* ((end-pos (position separator str
|
|---|
| 267 | :start start-pos :test #'char=))
|
|---|
| [12061] | 268 | (next-start-pos (and end-pos (position separator str
|
|---|
| [6] | 269 | :start end-pos
|
|---|
| 270 | :test-not #'char=)))
|
|---|
| 271 | (word-len (- (or end-pos str-len) start-pos))
|
|---|
| 272 | (new-buf-pos (+ buf-pos word-len)))
|
|---|
| 273 | (replace buf str
|
|---|
| 274 | :start1 buf-pos :start2 start-pos :end2 end-pos)
|
|---|
| [12061] | 275 | (push (cons buf-pos new-buf-pos) sep-pos)
|
|---|
| [6] | 276 | (setf buf-pos new-buf-pos)
|
|---|
| [12061] | 277 | (when (or (null end-pos) (null next-start-pos))
|
|---|
| [6] | 278 | (return))
|
|---|
| 279 | (setf start-pos next-start-pos)
|
|---|
| 280 | (setf (schar buf buf-pos) separator)
|
|---|
| 281 | (incf buf-pos)))))
|
|---|
| 282 | (nstring-downcase buf :end buf-pos)
|
|---|
| 283 | (setf *separator-positions* (nreverse sep-pos))
|
|---|
| 284 | buf-pos))
|
|---|
| 285 |
|
|---|
| 286 | |
|---|
| 287 |
|
|---|
| 288 | ;;;; Getstring, Setf Method for Getstring
|
|---|
| 289 |
|
|---|
| 290 | (defun getstring (string string-table)
|
|---|
| 291 | "Looks up String in String-Table. Returns two values: the first is
|
|---|
| 292 | the value of String or NIL if it does not exist; the second is a
|
|---|
| 293 | boolean flag indicating whether or not String was found in
|
|---|
| 294 | String-Table."
|
|---|
| 295 | (with-folded-string (folded len string (string-table-separator string-table))
|
|---|
| 296 | (let ((nodes (string-table-value-nodes string-table))
|
|---|
| 297 | (num-nodes (string-table-num-nodes string-table)))
|
|---|
| 298 | (declare (simple-vector nodes) (fixnum num-nodes))
|
|---|
| 299 | (multiple-value-bind
|
|---|
| 300 | (pos found-p)
|
|---|
| 301 | (bi-svposition folded nodes (string-compare* :end1 len)
|
|---|
| 302 | :end (1- num-nodes) :key #'value-node-folded)
|
|---|
| 303 | (if found-p
|
|---|
| 304 | (values (value-node-value (svref nodes pos)) t)
|
|---|
| 305 | (values nil nil))))))
|
|---|
| 306 |
|
|---|
| 307 | (defun %set-string-table (string table value)
|
|---|
| 308 | "Sets the value of String in Table to Value. If necessary, creates
|
|---|
| 309 | a new entry in the string table."
|
|---|
| 310 | (with-folded-string (folded len string (string-table-separator table))
|
|---|
| 311 | (when (zerop len)
|
|---|
| 312 | (error "An empty string cannot be inserted into a string-table."))
|
|---|
| 313 | (let ((nodes (string-table-value-nodes table))
|
|---|
| 314 | (num-nodes (string-table-num-nodes table)))
|
|---|
| 315 | (declare (simple-string folded) (simple-vector nodes) (fixnum num-nodes))
|
|---|
| 316 | (multiple-value-bind
|
|---|
| 317 | (pos found-p)
|
|---|
| 318 | (bi-svposition folded nodes (string-compare* :end1 len)
|
|---|
| 319 | :end (1- num-nodes) :key #'value-node-folded)
|
|---|
| 320 | (cond (found-p
|
|---|
| 321 | (setf (value-node-value (svref nodes pos)) value))
|
|---|
| 322 | (t
|
|---|
| 323 | ;; Note that a separator collapsed copy of string is NOT
|
|---|
| 324 | ;; used here ...
|
|---|
| 325 | ;;
|
|---|
| 326 | (let ((node (make-value-node string (subseq folded 0 len) value))
|
|---|
| 327 | (word-table (string-table-first-word-table table)))
|
|---|
| 328 | ;; put in the value nodes array
|
|---|
| 329 | (setf (string-table-value-nodes table)
|
|---|
| 330 | (insert-element nodes pos node num-nodes))
|
|---|
| 331 | (incf (string-table-num-nodes table))
|
|---|
| 332 | ;; insert it into the word tree
|
|---|
| 333 | (%set-insert-words folded word-table node))))))
|
|---|
| 334 | value))
|
|---|
| 335 |
|
|---|
| 336 | (defun %set-insert-words (folded first-word-table value-node)
|
|---|
| 337 | (declare (simple-string folded))
|
|---|
| 338 | (let ((word-table first-word-table)
|
|---|
| 339 | (entry nil))
|
|---|
| 340 | (do-words (word-start word-end)
|
|---|
| 341 | (let ((word-array (word-table-words word-table))
|
|---|
| 342 | (num-words (word-table-num-words word-table)))
|
|---|
| 343 | (declare (simple-vector word-array) (fixnum num-words))
|
|---|
| 344 | ;; find the entry or create a new one and insert it
|
|---|
| 345 | (multiple-value-bind
|
|---|
| 346 | (pos found-p)
|
|---|
| 347 | (bi-svposition folded word-array
|
|---|
| 348 | (string-compare* :start1 word-start :end1 word-end)
|
|---|
| 349 | :end (1- num-words) :key #'word-entry-folded)
|
|---|
| 350 | (declare (fixnum pos))
|
|---|
| 351 | (cond (found-p
|
|---|
| 352 | (setf entry (svref word-array pos)))
|
|---|
| 353 | (t
|
|---|
| 354 | (setf entry (make-word-entry
|
|---|
| 355 | (subseq folded word-start word-end)))
|
|---|
| 356 | (setf (word-table-words word-table)
|
|---|
| 357 | (insert-element word-array pos entry num-words))
|
|---|
| 358 | (incf (word-table-num-words word-table)))))
|
|---|
| 359 | (let ((next-table (word-entry-next-table entry)))
|
|---|
| [16082] | 360 | (unless next-table
|
|---|
| 361 | (setf next-table (make-word-table))
|
|---|
| 362 | (setf (word-entry-next-table entry) next-table))
|
|---|
| 363 | (setf word-table next-table))))
|
|---|
| 364 | (setf (word-entry-value-node entry) value-node)))
|
|---|
| [6] | 365 |
|
|---|
| 366 | (defun string-table-values (string-table)
|
|---|
| 367 | (loop with nodes = (string-table-value-nodes string-table)
|
|---|
| 368 | for i from 0 below (string-table-num-nodes string-table)
|
|---|
| 369 | collect (value-node-value (svref nodes i))))
|
|---|
| 370 |
|
|---|
| 371 | ;;;; Find-Bound-Entries
|
|---|
| 372 |
|
|---|
| 373 | (defun find-bound-entries (word-entries)
|
|---|
| 374 | (let ((res nil))
|
|---|
| 375 | (dolist (entry word-entries)
|
|---|
| 376 | (nconcf res (sub-find-bound-entries entry)))
|
|---|
| 377 | res))
|
|---|
| 378 |
|
|---|
| 379 | (defun sub-find-bound-entries (entry)
|
|---|
| 380 | (let ((bound-entries nil))
|
|---|
| 381 | (when (word-entry-value-node entry) (push entry bound-entries))
|
|---|
| 382 | (let ((next-table (word-entry-next-table entry)))
|
|---|
| 383 | (when next-table
|
|---|
| 384 | (let ((word-array (word-table-words next-table))
|
|---|
| 385 | (num-words (word-table-num-words next-table)))
|
|---|
| 386 | (declare (simple-vector word-array) (fixnum num-words))
|
|---|
| 387 | (dotimes (i num-words)
|
|---|
| 388 | (declare (fixnum i))
|
|---|
| 389 | (nconcf bound-entries
|
|---|
| 390 | (sub-find-bound-entries (svref word-array i)))))))
|
|---|
| 391 | bound-entries))
|
|---|
| 392 |
|
|---|
| 393 | |
|---|
| 394 |
|
|---|
| 395 | ;;;; Find-Ambiguous
|
|---|
| 396 |
|
|---|
| 397 | (defun find-ambiguous (string string-table)
|
|---|
| 398 | "Returns a list, in alphabetical order, of all the strings in String-Table
|
|---|
| 399 | which String matches."
|
|---|
| 400 | (with-folded-string (folded len string (string-table-separator string-table))
|
|---|
| 401 | (find-ambiguous* folded len string-table)))
|
|---|
| 402 |
|
|---|
| 403 | (defun find-ambiguous* (folded len table)
|
|---|
| 404 | (let ((word-table (string-table-first-word-table table))
|
|---|
| 405 | (word-entries nil))
|
|---|
| 406 | (cond ((zerop len)
|
|---|
| 407 | (setf word-entries (find-ambiguous-entries "" 0 0 word-table)))
|
|---|
| 408 | (t
|
|---|
| 409 | (let ((word-tables (list word-table)))
|
|---|
| 410 | (do-words (start end)
|
|---|
| 411 | (setf word-entries nil)
|
|---|
| 412 | (dolist (wt word-tables)
|
|---|
| 413 | (nconcf word-entries
|
|---|
| 414 | (find-ambiguous-entries folded start end wt)))
|
|---|
| 415 | (unless word-entries (return))
|
|---|
| 416 | (let ((next-word-tables nil))
|
|---|
| 417 | (dolist (entry word-entries)
|
|---|
| 418 | (let ((next-word-table (word-entry-next-table entry)))
|
|---|
| 419 | (when next-word-table
|
|---|
| 420 | (push next-word-table next-word-tables))))
|
|---|
| 421 | (unless next-word-tables (return))
|
|---|
| 422 | (setf word-tables (nreverse next-word-tables)))))))
|
|---|
| 423 | (let ((bound-entries (find-bound-entries word-entries))
|
|---|
| 424 | (res nil))
|
|---|
| 425 | (dolist (be bound-entries)
|
|---|
| 426 | (push (value-node-proper (word-entry-value-node be)) res))
|
|---|
| 427 | (nreverse res))))
|
|---|
| 428 |
|
|---|
| 429 | (defun find-ambiguous-entries (folded start end word-table)
|
|---|
| 430 | (let ((word-array (word-table-words word-table))
|
|---|
| 431 | (num-words (word-table-num-words word-table))
|
|---|
| 432 | (res nil))
|
|---|
| 433 | (declare (simple-vector word-array) (fixnum num-words))
|
|---|
| 434 | (unless (zerop num-words)
|
|---|
| 435 | (multiple-value-bind
|
|---|
| 436 | (pos found-p)
|
|---|
| 437 | (bi-svposition folded word-array
|
|---|
| 438 | (string-compare* :start1 start :end1 end)
|
|---|
| 439 | :end (1- num-words) :key #'word-entry-folded)
|
|---|
| 440 | (declare (ignore found-p))
|
|---|
| 441 | ;;
|
|---|
| 442 | ;; Find last ambiguous string, checking for the end of the table.
|
|---|
| 443 | (do ((i pos (1+ i)))
|
|---|
| 444 | ((= i num-words))
|
|---|
| 445 | (declare (fixnum i))
|
|---|
| 446 | (let* ((entry (svref word-array i))
|
|---|
| 447 | (str (word-entry-folded entry))
|
|---|
| 448 | (str-len (length str))
|
|---|
| 449 | (index (string/= folded str :start1 start :end1 end
|
|---|
| 450 | :end2 str-len)))
|
|---|
| 451 | (declare (simple-string str) (fixnum str-len))
|
|---|
| 452 | (when (and index (/= index end)) (return nil))
|
|---|
| 453 | (push entry res)))
|
|---|
| 454 | (setf res (nreverse res))
|
|---|
| 455 | ;;
|
|---|
| 456 | ;; Scan back to the first string, checking for the beginning.
|
|---|
| 457 | (do ((i (1- pos) (1- i)))
|
|---|
| 458 | ((minusp i))
|
|---|
| 459 | (declare (fixnum i))
|
|---|
| 460 | (let* ((entry (svref word-array i))
|
|---|
| 461 | (str (word-entry-folded entry))
|
|---|
| 462 | (str-len (length str))
|
|---|
| 463 | (index (string/= folded str :start1 start :end1 end
|
|---|
| 464 | :end2 str-len)))
|
|---|
| 465 | (declare (simple-string str) (fixnum str-len))
|
|---|
| 466 | (when (and index (/= index end)) (return nil))
|
|---|
| 467 | (push entry res)))))
|
|---|
| 468 | res))
|
|---|
| 469 |
|
|---|
| 470 | |
|---|
| 471 |
|
|---|
| 472 | ;;;; Find-Containing
|
|---|
| 473 |
|
|---|
| 474 | (defun find-containing (string string-table)
|
|---|
| 475 | "Return a list in alphabetical order of all the strings in Table which
|
|---|
| 476 | contain String as a substring."
|
|---|
| 477 | (with-folded-string (folded len string (string-table-separator string-table))
|
|---|
| 478 | (declare (ignore len))
|
|---|
| 479 | (let ((word-table (string-table-first-word-table string-table))
|
|---|
| 480 | (words nil))
|
|---|
| 481 | ;; cons up a list of the words
|
|---|
| 482 | (do-words (start end)
|
|---|
| 483 | (push (subseq folded start end) words))
|
|---|
| 484 | (setf words (nreverse words))
|
|---|
| 485 | (let ((entries (sub-find-containing words word-table))
|
|---|
| 486 | (res nil))
|
|---|
| 487 | (dolist (e entries)
|
|---|
| 488 | (push (value-node-proper (word-entry-value-node e)) res))
|
|---|
| 489 | (nreverse res)))))
|
|---|
| 490 |
|
|---|
| 491 | (defun sub-find-containing (words word-table)
|
|---|
| 492 | (let ((res nil)
|
|---|
| 493 | (word-array (word-table-words word-table))
|
|---|
| 494 | (num-words (word-table-num-words word-table)))
|
|---|
| 495 | (declare (simple-vector word-array) (fixnum num-words))
|
|---|
| 496 | (dotimes (i num-words)
|
|---|
| 497 | (declare (fixnum i))
|
|---|
| 498 | (let* ((entry (svref word-array i))
|
|---|
| 499 | (word (word-entry-folded entry))
|
|---|
| 500 | (found (find word words
|
|---|
| 501 | :test #'(lambda (y x)
|
|---|
| 502 | (let ((lx (length x))
|
|---|
| 503 | (ly (length y)))
|
|---|
| 504 | (and (<= lx ly)
|
|---|
| 505 | (string= x y :end2 lx))))))
|
|---|
| 506 | (rest-words (if found
|
|---|
| 507 | (remove found words :test #'eq :count 1)
|
|---|
| 508 | words)))
|
|---|
| 509 | (declare (simple-string word))
|
|---|
| 510 | (cond (rest-words
|
|---|
| 511 | (let ((next-table (word-entry-next-table entry)))
|
|---|
| 512 | (when next-table
|
|---|
| 513 | (nconcf res (sub-find-containing rest-words next-table)))))
|
|---|
| 514 | (t
|
|---|
| 515 | (nconcf res (sub-find-bound-entries entry))))))
|
|---|
| 516 | res))
|
|---|
| 517 |
|
|---|
| 518 | |
|---|
| 519 |
|
|---|
| 520 | ;;;; Complete-String
|
|---|
| 521 |
|
|---|
| 522 | (defvar *complete-string-buffer-size* 128)
|
|---|
| 523 | (defvar *complete-string-buffer* (make-string *complete-string-buffer-size*))
|
|---|
| 524 | (declaim (simple-string *complete-string-buffer*))
|
|---|
| 525 |
|
|---|
| 526 | (defun complete-string (string tables)
|
|---|
| 527 | "Attempts to complete the string String against the string tables in the
|
|---|
| 528 | list Tables. Tables must all use the same separator character. See the
|
|---|
| 529 | manual for details on return values."
|
|---|
| 530 | (let ((separator (string-table-separator (car tables))))
|
|---|
| 531 | #|(when (member separator (cdr tables)
|
|---|
| 532 | :key #'string-table-separator :test-not #'char=)
|
|---|
| 533 | (error "All tables must have the same separator."))|#
|
|---|
| 534 | (with-folded-string (folded len string separator)
|
|---|
| 535 | (let ((strings nil))
|
|---|
| 536 | (dolist (table tables)
|
|---|
| 537 | (nconcf strings (find-ambiguous* folded len table)))
|
|---|
| 538 | ;; pick off easy case
|
|---|
| 539 | (when (null strings)
|
|---|
| 540 | (return-from complete-string (values nil :none nil nil nil)))
|
|---|
| 541 | ;; grow complete-string buffer if necessary
|
|---|
| 542 | (let ((size-needed (1+ len)))
|
|---|
| 543 | (when (> size-needed *complete-string-buffer-size*)
|
|---|
| 544 | (let* ((new-size (* size-needed 2))
|
|---|
| 545 | (new-buffer (make-string new-size)))
|
|---|
| 546 | (setf *complete-string-buffer* new-buffer)
|
|---|
| 547 | (setf *complete-string-buffer-size* new-size))))
|
|---|
| 548 | (multiple-value-bind
|
|---|
| 549 | (str ambig-pos unique-p)
|
|---|
| 550 | (find-longest-completion strings separator)
|
|---|
| 551 | (multiple-value-bind (value found-p) (find-values str tables)
|
|---|
| 552 | (let ((field-pos (compute-field-pos string str separator)))
|
|---|
| 553 | (cond ((not found-p)
|
|---|
| 554 | (values str :ambiguous nil field-pos ambig-pos))
|
|---|
| 555 | (unique-p
|
|---|
| 556 | (values str :unique value field-pos nil))
|
|---|
| 557 | (t
|
|---|
| 558 | (values str :complete value field-pos ambig-pos))))))))))
|
|---|
| 559 |
|
|---|
| 560 | (defun find-values (string tables)
|
|---|
| 561 | (dolist (table tables)
|
|---|
| 562 | (multiple-value-bind (value found-p) (getstring string table)
|
|---|
| 563 | (when found-p
|
|---|
| 564 | (return-from find-values (values value t)))))
|
|---|
| 565 | (values nil nil))
|
|---|
| 566 |
|
|---|
| 567 | (defun compute-field-pos (given best separator)
|
|---|
| 568 | (declare (simple-string given best) (base-char separator))
|
|---|
| 569 | (let ((give-pos 0)
|
|---|
| 570 | (best-pos 0))
|
|---|
| 571 | (loop
|
|---|
| 572 | (setf give-pos (position separator given :start give-pos :test #'char=))
|
|---|
| 573 | (setf best-pos (position separator best :start best-pos :test #'char=))
|
|---|
| 574 | (unless (and give-pos best-pos) (return best-pos))
|
|---|
| 575 | (incf (the fixnum give-pos))
|
|---|
| 576 | (incf (the fixnum best-pos)))))
|
|---|
| 577 |
|
|---|
| 578 | |
|---|
| 579 |
|
|---|
| 580 | ;;;; Find-Longest-Completion
|
|---|
| 581 |
|
|---|
| 582 | (defun find-longest-completion (strings separator)
|
|---|
| 583 | (declare (base-char separator))
|
|---|
| 584 | (let ((first (car strings))
|
|---|
| 585 | (rest-strings (cdr strings))
|
|---|
| 586 | (punt-p nil)
|
|---|
| 587 | (buf-pos 0)
|
|---|
| 588 | (first-start 0)
|
|---|
| 589 | (first-end -1)
|
|---|
| 590 | (ambig-pos nil)
|
|---|
| 591 | (maybe-unique-p nil))
|
|---|
| 592 | (declare (simple-string first) (fixnum buf-pos first-start))
|
|---|
| 593 | ;;
|
|---|
| 594 | ;; Make room to store each string's next separator index.
|
|---|
| 595 | (do ((l rest-strings (cdr l)))
|
|---|
| 596 | ((endp l))
|
|---|
| 597 | (setf (car l) (cons (car l) -1)))
|
|---|
| 598 | ;;
|
|---|
| 599 | ;; Compare the rest of the strings to the first one.
|
|---|
| 600 | ;; It's our de facto standard for how far we can go.
|
|---|
| 601 | (loop
|
|---|
| 602 | (setf first-start (1+ first-end))
|
|---|
| 603 | (setf first-end
|
|---|
| 604 | (position separator first :start first-start :test #'char=))
|
|---|
| 605 | (unless first-end
|
|---|
| 606 | (setf first-end (length first))
|
|---|
| 607 | (setf punt-p t)
|
|---|
| 608 | (setf maybe-unique-p t))
|
|---|
| 609 | (let ((first-max first-end)
|
|---|
| 610 | (word-ambiguous-p nil))
|
|---|
| 611 | (declare (fixnum first-max))
|
|---|
| 612 | ;;
|
|---|
| 613 | ;; For each string, store the separator's next index.
|
|---|
| 614 | ;; If there's no separator, store nil and prepare to punt.
|
|---|
| 615 | ;; If the string's field is not equal to the first's, shorten the max
|
|---|
| 616 | ;; expectation for this field, and declare ambiguity.
|
|---|
| 617 | (dolist (s rest-strings)
|
|---|
| 618 | (let* ((str (car s))
|
|---|
| 619 | (str-last-pos (cdr s))
|
|---|
| 620 | (str-start (1+ str-last-pos))
|
|---|
| 621 | (str-end (position separator str
|
|---|
| 622 | :start str-start :test #'char=))
|
|---|
| 623 | (index (string-not-equal first str
|
|---|
| 624 | :start1 first-start :end1 first-max
|
|---|
| 625 | :start2 str-start :end2 str-end)))
|
|---|
| 626 | (declare (simple-string str) (fixnum str-last-pos str-start))
|
|---|
| 627 | (setf (cdr s) str-end)
|
|---|
| 628 | (unless str-end
|
|---|
| 629 | (setf punt-p t)
|
|---|
| 630 | (setf str-end (length str)))
|
|---|
| 631 | (when index
|
|---|
| 632 | (setf word-ambiguous-p t) ; not equal for some reason
|
|---|
| 633 | (when (< index first-max)
|
|---|
| 634 | (setf first-max index)))))
|
|---|
| 635 | ;;
|
|---|
| 636 | ;; Store what we matched into the result buffer and save the
|
|---|
| 637 | ;; ambiguous position if its the first ambiguous field.
|
|---|
| 638 | (let ((length (- first-max first-start)))
|
|---|
| 639 | (declare (fixnum length))
|
|---|
| 640 | (unless (zerop length)
|
|---|
| 641 | (unless (zerop buf-pos)
|
|---|
| 642 | (setf (schar *complete-string-buffer* buf-pos) separator)
|
|---|
| 643 | (incf buf-pos))
|
|---|
| 644 | (replace *complete-string-buffer* first
|
|---|
| 645 | :start1 buf-pos :start2 first-start :end2 first-max)
|
|---|
| 646 | (incf buf-pos length))
|
|---|
| 647 | (when (and (null ambig-pos) word-ambiguous-p)
|
|---|
| 648 | (setf ambig-pos buf-pos))
|
|---|
| 649 | (when (or punt-p (zerop length)) (return)))))
|
|---|
| 650 | (values
|
|---|
| 651 | (subseq *complete-string-buffer* 0 buf-pos)
|
|---|
| 652 | ;; If every corresponding field in each possible completion was equal,
|
|---|
| 653 | ;; our result string is an initial substring of some other completion,
|
|---|
| 654 | ;; so we're ambiguous at the end.
|
|---|
| 655 | (or ambig-pos buf-pos)
|
|---|
| 656 | (and (null ambig-pos)
|
|---|
| 657 | maybe-unique-p
|
|---|
| 658 | (every #'(lambda (x) (null (cdr x))) rest-strings)))))
|
|---|
| 659 |
|
|---|
| 660 | |
|---|
| 661 |
|
|---|
| 662 | ;;;; Clrstring
|
|---|
| 663 |
|
|---|
| 664 | (defun clrstring (string-table)
|
|---|
| 665 | "Delete all the entries in String-Table."
|
|---|
| 666 | (fill (the simple-vector (string-table-value-nodes string-table)) nil)
|
|---|
| 667 | (setf (string-table-num-nodes string-table) 0)
|
|---|
| 668 | (let ((word-table (string-table-first-word-table string-table)))
|
|---|
| 669 | (fill (the simple-vector (word-table-words word-table)) nil)
|
|---|
| 670 | (setf (word-table-num-words word-table) 0))
|
|---|
| 671 | t)
|
|---|
| 672 |
|
|---|
| 673 | |
|---|
| 674 |
|
|---|
| 675 | ;;;; Delete-String
|
|---|
| 676 |
|
|---|
| 677 | (defun delete-string (string string-table)
|
|---|
| 678 | (with-folded-string (folded len string (string-table-separator string-table))
|
|---|
| 679 | (when (plusp len)
|
|---|
| 680 | (let* ((nodes (string-table-value-nodes string-table))
|
|---|
| 681 | (num-nodes (string-table-num-nodes string-table))
|
|---|
| 682 | (end (1- num-nodes)))
|
|---|
| 683 | (declare (simple-string folded) (simple-vector nodes)
|
|---|
| 684 | (fixnum num-nodes end))
|
|---|
| 685 | (multiple-value-bind
|
|---|
| 686 | (pos found-p)
|
|---|
| 687 | (bi-svposition folded nodes (string-compare* :end1 len)
|
|---|
| 688 | :end end :key #'value-node-folded)
|
|---|
| 689 | (cond (found-p
|
|---|
| 690 | (replace nodes nodes
|
|---|
| 691 | :start1 pos :end1 end :start2 (1+ pos) :end2 num-nodes)
|
|---|
| 692 | (setf (svref nodes end) nil)
|
|---|
| 693 | (setf (string-table-num-nodes string-table) end)
|
|---|
| 694 | (sub-delete-string folded string-table)
|
|---|
| 695 | t)
|
|---|
| 696 | (t nil)))))))
|
|---|
| 697 |
|
|---|
| 698 | (defun sub-delete-string (folded string-table)
|
|---|
| 699 | (let ((next-table (string-table-first-word-table string-table))
|
|---|
| 700 | (word-table nil)
|
|---|
| 701 | (node nil)
|
|---|
| 702 | (entry nil)
|
|---|
| 703 | (level -1)
|
|---|
| 704 | last-table last-table-level last-table-pos
|
|---|
| 705 | last-entry last-entry-level)
|
|---|
| 706 | (declare (fixnum level))
|
|---|
| 707 | (do-words (start end)
|
|---|
| 708 | (when node
|
|---|
| 709 | (setf last-entry entry)
|
|---|
| 710 | (setf last-entry-level level))
|
|---|
| 711 | (setf word-table next-table)
|
|---|
| 712 | (incf level)
|
|---|
| 713 | (let ((word-array (word-table-words word-table))
|
|---|
| 714 | (num-words (word-table-num-words word-table)))
|
|---|
| 715 | (declare (simple-vector word-array) (fixnum num-words))
|
|---|
| 716 | (multiple-value-bind
|
|---|
| 717 | (pos found-p)
|
|---|
| 718 | (bi-svposition folded word-array
|
|---|
| 719 | (string-compare* :start1 start :end1 end)
|
|---|
| 720 | :end (1- num-words) :key #'word-entry-folded)
|
|---|
| 721 | (declare (fixnum pos) (ignore found-p))
|
|---|
| 722 | (setf entry (svref word-array pos))
|
|---|
| 723 | (setf next-table (word-entry-next-table entry))
|
|---|
| 724 | (setf node (word-entry-value-node entry))
|
|---|
| 725 | (when (or (null last-table) (> num-words 1))
|
|---|
| 726 | (setf last-table word-table)
|
|---|
| 727 | (setf last-table-pos pos)
|
|---|
| 728 | (setf last-table-level level)))))
|
|---|
| 729 | (cond (next-table
|
|---|
| 730 | (setf (word-entry-value-node entry) nil))
|
|---|
| 731 | ((and last-entry-level
|
|---|
| 732 | (>= last-entry-level last-table-level))
|
|---|
| 733 | (setf (word-entry-next-table last-entry) nil))
|
|---|
| 734 | (t
|
|---|
| 735 | (let* ((del-word-array (word-table-words last-table))
|
|---|
| 736 | (del-num-words (word-table-num-words last-table))
|
|---|
| 737 | (del-end (1- del-num-words)))
|
|---|
| 738 | (declare (simple-vector del-word-array)
|
|---|
| 739 | (fixnum del-num-words del-end))
|
|---|
| 740 | (replace del-word-array del-word-array
|
|---|
| 741 | :start1 last-table-pos :end1 del-end
|
|---|
| 742 | :start2 (1+ last-table-pos)
|
|---|
| 743 | :end2 del-num-words)
|
|---|
| 744 | (setf (svref del-word-array del-end) nil)
|
|---|
| 745 | (setf (word-table-num-words last-table) del-end))))))
|
|---|