| 1 | ;;; -*- Log: hemlock.log; Package: Spell -*-
|
|---|
| 2 | ;;;
|
|---|
| 3 | ;;; **********************************************************************
|
|---|
| 4 | ;;; This code was written as part of the CMU Common Lisp project at
|
|---|
| 5 | ;;; Carnegie Mellon University, and has been placed in the public domain.
|
|---|
| 6 | ;;;
|
|---|
| 7 | #+CMU (ext:file-comment
|
|---|
| 8 | "$Header$")
|
|---|
| 9 | ;;;
|
|---|
| 10 | ;;; **********************************************************************
|
|---|
| 11 | ;;;
|
|---|
| 12 | ;;; Written by Bill Chiles
|
|---|
| 13 | ;;; Designed by Bill Chiles and Rob Maclachlan
|
|---|
| 14 | ;;;
|
|---|
| 15 | ;;; This file contains the code to grow the spelling dictionary in system
|
|---|
| 16 | ;;; space by reading a text file of entries or adding one at a time. This
|
|---|
| 17 | ;;; code relies on implementation dependent code found in Spell-RT.Lisp.
|
|---|
| 18 |
|
|---|
| 19 |
|
|---|
| 20 | (in-package "SPELL")
|
|---|
| 21 |
|
|---|
| 22 | |
|---|
| 23 |
|
|---|
| 24 | ;;;; Converting Flags to Masks
|
|---|
| 25 |
|
|---|
| 26 | (defconstant flag-names-to-masks
|
|---|
| 27 | `((#\V . ,V-mask) (#\N . ,N-mask) (#\X . ,X-mask)
|
|---|
| 28 | (#\H . ,H-mask) (#\Y . ,Y-mask) (#\G . ,G-mask)
|
|---|
| 29 | (#\J . ,J-mask) (#\D . ,D-mask) (#\T . ,T-mask)
|
|---|
| 30 | (#\R . ,R-mask) (#\Z . ,Z-mask) (#\S . ,S-mask)
|
|---|
| 31 | (#\P . ,P-mask) (#\M . ,M-mask)))
|
|---|
| 32 |
|
|---|
| 33 | (defvar *flag-masks*
|
|---|
| 34 | (make-array 128 :element-type '(unsigned-byte 16) :initial-element 0)
|
|---|
| 35 | "This holds the masks for character flags, which is used when reading
|
|---|
| 36 | a text file of dictionary words. Illegal character flags hold zero.")
|
|---|
| 37 |
|
|---|
| 38 | (eval-when (:compile-toplevel :execute)
|
|---|
| 39 | (defmacro flag-mask (char)
|
|---|
| 40 | `(aref *flag-masks* (char-code ,char)))
|
|---|
| 41 | ) ;eval-when
|
|---|
| 42 |
|
|---|
| 43 | (dolist (e flag-names-to-masks)
|
|---|
| 44 | (let ((char (car e))
|
|---|
| 45 | (mask (cdr e)))
|
|---|
| 46 | (setf (flag-mask char) mask)
|
|---|
| 47 | (setf (flag-mask (char-downcase char)) mask)))
|
|---|
| 48 |
|
|---|
| 49 |
|
|---|
| 50 | |
|---|
| 51 |
|
|---|
| 52 | ;;;; String and Hashing Macros
|
|---|
| 53 |
|
|---|
| 54 | (eval-when (:compile-toplevel :execute)
|
|---|
| 55 |
|
|---|
| 56 | (defmacro string-table-replace (src-string dst-start length)
|
|---|
| 57 | `(sap-replace *string-table* ,src-string 0 ,dst-start (+ ,dst-start ,length)))
|
|---|
| 58 |
|
|---|
| 59 | ;;; HASH-ENTRY is used in SPELL-ADD-ENTRY to find a dictionary location for
|
|---|
| 60 | ;;; adding a new entry. If a location contains a zero, then it has never been
|
|---|
| 61 | ;;; used, and no entries have ever been "hashed past" it. If a location
|
|---|
| 62 | ;;; contains SPELL-DELETED-ENTRY, then it once contained an entry that has
|
|---|
| 63 | ;;; since been deleted.
|
|---|
| 64 | ;;;
|
|---|
| 65 | (defmacro hash-entry (entry entry-len)
|
|---|
| 66 | (let ((loop-loc (gensym)) (loc-contents (gensym))
|
|---|
| 67 | (hash (gensym)) (loc (gensym)))
|
|---|
| 68 | `(let* ((,hash (string-hash ,entry ,entry-len))
|
|---|
| 69 | (,loc (rem ,hash (the fixnum *dictionary-size*)))
|
|---|
| 70 | (,loc-contents (dictionary-ref ,loc)))
|
|---|
| 71 | (declare (fixnum ,loc ,loc-contents))
|
|---|
| 72 | (if (or (zerop ,loc-contents) (= ,loc-contents spell-deleted-entry))
|
|---|
| 73 | ,loc
|
|---|
| 74 | (hash2-loop (,loop-loc ,loc-contents) ,loc ,hash
|
|---|
| 75 | ,loop-loc nil t)))))
|
|---|
| 76 |
|
|---|
| 77 | ) ;eval-when
|
|---|
| 78 |
|
|---|
| 79 |
|
|---|
| 80 | |
|---|
| 81 |
|
|---|
| 82 | ;;;; Top Level Stuff
|
|---|
| 83 |
|
|---|
| 84 | (defun spell-read-dictionary (filename)
|
|---|
| 85 | "Add entries to dictionary from lines in the file filename."
|
|---|
| 86 | (with-open-file (s filename :direction :input)
|
|---|
| 87 | (loop (multiple-value-bind (entry eofp) (read-line s nil nil)
|
|---|
| 88 | (declare (type (or simple-string null) entry))
|
|---|
| 89 | (unless entry (return))
|
|---|
| 90 | (spell-add-entry entry)
|
|---|
| 91 | (if eofp (return))))))
|
|---|
| 92 |
|
|---|
| 93 |
|
|---|
| 94 | ;;; This is used to break up an 18 bit string table index into two parts
|
|---|
| 95 | ;;; for storage in a word descriptor unit. See the documentation at the
|
|---|
| 96 | ;;; top of Spell-Correct.Lisp.
|
|---|
| 97 | ;;;
|
|---|
| 98 | (defconstant whole-index-low-byte (byte 16 0))
|
|---|
| 99 |
|
|---|
| 100 | (defun spell-add-entry (line &optional
|
|---|
| 101 | (word-end (or (position #\/ line :test #'char=)
|
|---|
| 102 | (length line))))
|
|---|
| 103 | "Line is of the form \"entry/flag1/flag2\" or \"entry\". It is parsed and
|
|---|
| 104 | added to the spelling dictionary. Line is desstructively modified."
|
|---|
| 105 | (declare (simple-string line) (fixnum word-end))
|
|---|
| 106 | (nstring-upcase line :end word-end)
|
|---|
| 107 | (when (> word-end max-entry-length)
|
|---|
| 108 | (return-from spell-add-entry nil))
|
|---|
| 109 | (let ((entry (lookup-entry line word-end)))
|
|---|
| 110 | (when entry
|
|---|
| 111 | (add-flags (+ entry 2) line word-end)
|
|---|
| 112 | (return-from spell-add-entry nil)))
|
|---|
| 113 | (let* ((hash-loc (hash-entry line word-end))
|
|---|
| 114 | (string-ptr *string-table-size*)
|
|---|
| 115 | (desc-ptr *descriptors-size*)
|
|---|
| 116 | (desc-ptr+1 (1+ desc-ptr))
|
|---|
| 117 | (desc-ptr+2 (1+ desc-ptr+1)))
|
|---|
| 118 | (declare (fixnum string-ptr))
|
|---|
| 119 | (when (not hash-loc) (error "Dictionary Overflow!"))
|
|---|
| 120 | (when (> 3 *free-descriptor-elements*) (grow-descriptors))
|
|---|
| 121 | (when (> word-end *free-string-table-bytes*) (grow-string-table))
|
|---|
| 122 | (decf *free-descriptor-elements* 3)
|
|---|
| 123 | (incf *descriptors-size* 3)
|
|---|
| 124 | (decf *free-string-table-bytes* word-end)
|
|---|
| 125 | (incf *string-table-size* word-end)
|
|---|
| 126 | (setf (dictionary-ref hash-loc) desc-ptr)
|
|---|
| 127 | (setf (descriptor-ref desc-ptr)
|
|---|
| 128 | (dpb (the fixnum (ldb new-hash-byte (string-hash line word-end)))
|
|---|
| 129 | stored-hash-byte
|
|---|
| 130 | word-end))
|
|---|
| 131 | (setf (descriptor-ref desc-ptr+1)
|
|---|
| 132 | (ldb whole-index-low-byte string-ptr))
|
|---|
| 133 | (setf (descriptor-ref desc-ptr+2)
|
|---|
| 134 | (dpb (the fixnum (ldb whole-index-high-byte string-ptr))
|
|---|
| 135 | stored-index-high-byte
|
|---|
| 136 | 0))
|
|---|
| 137 | (add-flags desc-ptr+2 line word-end)
|
|---|
| 138 | (string-table-replace line string-ptr word-end))
|
|---|
| 139 | t)
|
|---|
| 140 |
|
|---|
| 141 | (defun add-flags (loc line word-end)
|
|---|
| 142 | (declare (simple-string line) (fixnum word-end))
|
|---|
| 143 | (do ((flag (1+ word-end) (+ 2 flag))
|
|---|
| 144 | (line-end (length line)))
|
|---|
| 145 | ((>= flag line-end))
|
|---|
| 146 | (declare (fixnum flag line-end))
|
|---|
| 147 | (let ((flag-mask (flag-mask (schar line flag))))
|
|---|
| 148 | (declare (fixnum flag-mask))
|
|---|
| 149 | (unless (zerop flag-mask)
|
|---|
| 150 | (setf (descriptor-ref loc)
|
|---|
| 151 | (logior flag-mask (descriptor-ref loc)))))))
|
|---|
| 152 |
|
|---|
| 153 | ;;; SPELL-REMOVE-ENTRY destructively uppercases entry in removing it from
|
|---|
| 154 | ;;; the dictionary. First entry is looked up, and if it is found due to a
|
|---|
| 155 | ;;; flag, the flag is cleared in the descriptor table. If entry is a root
|
|---|
| 156 | ;;; word in the dictionary (that is, looked up without the use of a flag),
|
|---|
| 157 | ;;; then the root and all its derivitives are deleted by setting its
|
|---|
| 158 | ;;; dictionary location to spell-deleted-entry.
|
|---|
| 159 | ;;;
|
|---|
| 160 | (defun spell-remove-entry (entry)
|
|---|
| 161 | "Removes entry from the dictionary, so it will be an unknown word. Entry
|
|---|
| 162 | is a simple string and is destructively modified. If entry is a root
|
|---|
| 163 | word, then all words derived with entry and its flags will also be deleted."
|
|---|
| 164 | (declare (simple-string entry))
|
|---|
| 165 | (nstring-upcase entry)
|
|---|
| 166 | (let ((entry-len (length entry)))
|
|---|
| 167 | (declare (fixnum entry-len))
|
|---|
| 168 | (when (<= 2 entry-len max-entry-length)
|
|---|
| 169 | (multiple-value-bind (index flagp)
|
|---|
| 170 | (spell-try-word entry entry-len)
|
|---|
| 171 | (when index
|
|---|
| 172 | (if flagp
|
|---|
| 173 | (setf (descriptor-ref (+ 2 index))
|
|---|
| 174 | (logandc2 (descriptor-ref (+ 2 index)) flagp))
|
|---|
| 175 | (let* ((hash (string-hash entry entry-len))
|
|---|
| 176 | (hash-and-len (dpb (the fixnum (ldb new-hash-byte hash))
|
|---|
| 177 | stored-hash-byte
|
|---|
| 178 | (the fixnum entry-len)))
|
|---|
| 179 | (loc (rem hash (the fixnum *dictionary-size*)))
|
|---|
| 180 | (loc-contents (dictionary-ref loc)))
|
|---|
| 181 | (declare (fixnum hash hash-and-len loc))
|
|---|
| 182 | (cond ((zerop loc-contents) nil)
|
|---|
| 183 | ((found-entry-p loc-contents entry entry-len hash-and-len)
|
|---|
| 184 | (setf (dictionary-ref loc) spell-deleted-entry))
|
|---|
| 185 | (t
|
|---|
| 186 | (hash2-loop (loop-loc loc-contents) loc hash
|
|---|
| 187 | nil
|
|---|
| 188 | (when (found-entry-p loc-contents entry
|
|---|
| 189 | entry-len hash-and-len)
|
|---|
| 190 | (setf (dictionary-ref loop-loc)
|
|---|
| 191 | spell-deleted-entry)
|
|---|
| 192 | (return spell-deleted-entry))))))))))))
|
|---|
| 193 |
|
|---|
| 194 | (defun spell-root-flags (index)
|
|---|
| 195 | "Return the flags associated with the root word corresponding to a
|
|---|
| 196 | dictionary entry at index."
|
|---|
| 197 | (let ((desc-word (descriptor-ref (+ 2 index)))
|
|---|
| 198 | (result ()))
|
|---|
| 199 | (declare (fixnum desc-word))
|
|---|
| 200 | (dolist (ele flag-names-to-masks result)
|
|---|
| 201 | (unless (zerop (logand (the fixnum (cdr ele)) desc-word))
|
|---|
| 202 | (push (car ele) result)))))
|
|---|
| 203 |
|
|---|
| 204 |
|
|---|
| 205 | |
|---|
| 206 |
|
|---|
| 207 | ;;;; Growing Dictionary Structures
|
|---|
| 208 |
|
|---|
| 209 | ;;; GROW-DESCRIPTORS grows the descriptors vector by 10%.
|
|---|
| 210 | ;;;
|
|---|
| 211 | (defun grow-descriptors ()
|
|---|
| 212 | (let* ((old-size (+ (the fixnum *descriptors-size*)
|
|---|
| 213 | (the fixnum *free-descriptor-elements*)))
|
|---|
| 214 | (new-size (truncate (* old-size 1.1)))
|
|---|
| 215 | (new-bytes (* new-size 2))
|
|---|
| 216 | (new-sap (allocate-bytes new-bytes)))
|
|---|
| 217 | (declare (fixnum new-size old-size))
|
|---|
| 218 | (sap-replace new-sap *descriptors* 0 0
|
|---|
| 219 | (* 2 (the fixnum *descriptors-size*)))
|
|---|
| 220 | (deallocate-bytes (system-address *descriptors*) (* 2 old-size))
|
|---|
| 221 | (setf *free-descriptor-elements*
|
|---|
| 222 | (- new-size (the fixnum *descriptors-size*)))
|
|---|
| 223 | (setf *descriptors* new-sap)))
|
|---|
| 224 |
|
|---|
| 225 | ;;; GROW-STRING-TABLE grows the string table by 10%.
|
|---|
| 226 | ;;;
|
|---|
| 227 | (defun grow-string-table ()
|
|---|
| 228 | (let* ((old-size (+ (the fixnum *string-table-size*)
|
|---|
| 229 | (the fixnum *free-string-table-bytes*)))
|
|---|
| 230 | (new-size (truncate (* old-size 1.1)))
|
|---|
| 231 | (new-sap (allocate-bytes new-size)))
|
|---|
| 232 | (declare (fixnum new-size old-size))
|
|---|
| 233 | (sap-replace new-sap *string-table* 0 0 *string-table-size*)
|
|---|
| 234 | (setf *free-string-table-bytes*
|
|---|
| 235 | (- new-size (the fixnum *string-table-size*)))
|
|---|
| 236 | (deallocate-bytes (system-address *string-table*) old-size)
|
|---|
| 237 | (setf *string-table* new-sap)))
|
|---|