| 1 | ; -*- Log: hemlock.log; Package: Spell -*-
|
|---|
| 2 | ;;;
|
|---|
| 3 | ;;; **********************************************************************
|
|---|
| 4 | ;;; This code was written as part of the CMU Common Lisp project at
|
|---|
| 5 | ;;; Carnegie Mellon University, and has been placed in the public domain.
|
|---|
| 6 | ;;;
|
|---|
| 7 | ;;; **********************************************************************
|
|---|
| 8 | ;;;
|
|---|
| 9 | ;;; Written by Bill Chiles
|
|---|
| 10 | ;;; Designed by Bill Chiles and Rob Maclachlan
|
|---|
| 11 | ;;;
|
|---|
| 12 | ;;; This file contains the code to grow the spelling dictionary in system
|
|---|
| 13 | ;;; space by reading a text file of entries or adding one at a time. This
|
|---|
| 14 | ;;; code relies on implementation dependent code found in Spell-RT.Lisp.
|
|---|
| 15 |
|
|---|
| 16 |
|
|---|
| 17 | (in-package "SPELL")
|
|---|
| 18 |
|
|---|
| 19 | |
|---|
| 20 |
|
|---|
| 21 | ;;;; String and Hashing Macros
|
|---|
| 22 |
|
|---|
| 23 | (eval-when (:compile-toplevel :execute)
|
|---|
| 24 |
|
|---|
| 25 | (defmacro string-table-replace (src-string dst-start length)
|
|---|
| 26 | `(sap-replace *string-table* ,src-string 0 ,dst-start (+ ,dst-start ,length)))
|
|---|
| 27 |
|
|---|
| 28 | ;;; HASH-ENTRY is used in SPELL-ADD-ENTRY to find a dictionary location for
|
|---|
| 29 | ;;; adding a new entry. If a location contains a zero, then it has never been
|
|---|
| 30 | ;;; used, and no entries have ever been "hashed past" it. If a location
|
|---|
| 31 | ;;; contains SPELL-DELETED-ENTRY, then it once contained an entry that has
|
|---|
| 32 | ;;; since been deleted.
|
|---|
| 33 | ;;;
|
|---|
| 34 | (defmacro hash-entry (entry entry-len)
|
|---|
| 35 | (let ((loop-loc (gensym)) (loc-contents (gensym))
|
|---|
| 36 | (hash (gensym)) (loc (gensym)))
|
|---|
| 37 | `(let* ((,hash (string-hash ,entry ,entry-len))
|
|---|
| 38 | (,loc (rem ,hash (the fixnum *dictionary-size*)))
|
|---|
| 39 | (,loc-contents (dictionary-ref ,loc)))
|
|---|
| 40 | (declare (fixnum ,loc ,loc-contents))
|
|---|
| 41 | (if (or (zerop ,loc-contents) (= ,loc-contents spell-deleted-entry))
|
|---|
| 42 | ,loc
|
|---|
| 43 | (hash2-loop (,loop-loc ,loc-contents) ,loc ,hash
|
|---|
| 44 | ,loop-loc nil t)))))
|
|---|
| 45 |
|
|---|
| 46 | ) ;eval-when
|
|---|
| 47 |
|
|---|
| 48 |
|
|---|
| 49 | |
|---|
| 50 |
|
|---|
| 51 | ;;;; Top Level Stuff
|
|---|
| 52 |
|
|---|
| 53 | (defun spell-read-dictionary (dictionary filename)
|
|---|
| 54 | "Add entries to DICTIONARY from lines in the file FILENAME."
|
|---|
| 55 | (with-open-file (s filename :direction :input)
|
|---|
| 56 | (loop (multiple-value-bind (entry eofp) (read-line s nil nil)
|
|---|
| 57 | (declare (type (or simple-string null) entry))
|
|---|
| 58 | (unless entry (return))
|
|---|
| 59 | (spell-add-entry entry)
|
|---|
| 60 | (if eofp (return))))))
|
|---|
| 61 |
|
|---|
| 62 |
|
|---|
| 63 | (defun spell-add-entry (dictionary line &optional
|
|---|
| 64 | (word-end (or (position #\/ line :test #'char=)
|
|---|
| 65 | (length line))))
|
|---|
| 66 | "Line is of the form \"entry/flag1/flag2\" or \"entry\". It is parsed and
|
|---|
| 67 | added to the spelling dictionary. Line is destructively modified."
|
|---|
| 68 | (declare (simple-string line) (fixnum word-end))
|
|---|
| 69 | (nstring-upcase line :end word-end)
|
|---|
| 70 | (when (> word-end max-entry-length)
|
|---|
| 71 | (return-from spell-add-entry nil))
|
|---|
| 72 | (let ((entry (lookup-entry line word-end)))
|
|---|
| 73 | (when entry
|
|---|
| 74 | (add-flags (+ entry 2) line word-end)
|
|---|
| 75 | (return-from spell-add-entry nil)))
|
|---|
| 76 | (let* ((hash-loc (hash-entry line word-end))
|
|---|
| 77 | (string-ptr *string-table-size*)
|
|---|
| 78 | (desc-ptr *descriptors-size*)
|
|---|
| 79 | (desc-ptr+1 (1+ desc-ptr))
|
|---|
| 80 | (desc-ptr+2 (1+ desc-ptr+1)))
|
|---|
| 81 | (declare (fixnum string-ptr))
|
|---|
| 82 | (when (not hash-loc) (error "Dictionary Overflow!"))
|
|---|
| 83 | (when (> 3 *free-descriptor-elements*) (grow-descriptors))
|
|---|
| 84 | (when (> word-end *free-string-table-bytes*) (grow-string-table))
|
|---|
| 85 | (decf *free-descriptor-elements* 3)
|
|---|
| 86 | (incf *descriptors-size* 3)
|
|---|
| 87 | (decf *free-string-table-bytes* word-end)
|
|---|
| 88 | (incf *string-table-size* word-end)
|
|---|
| 89 | (setf (dictionary-ref hash-loc) desc-ptr)
|
|---|
| 90 | (let ((desc (make-descriptor :hash-code (ldb new-hash-byte
|
|---|
| 91 | (string-hash line word-end))
|
|---|
| 92 | :length word-end
|
|---|
| 93 | :string-index string-ptr
|
|---|
| 94 | :flags (word-flags line word-end))))
|
|---|
| 95 | (add-flags desc-ptr+2 line word-end)
|
|---|
| 96 | (string-table-replace line string-ptr word-end))
|
|---|
| 97 | t)
|
|---|
| 98 |
|
|---|
| 99 | ;;; SPELL-REMOVE-ENTRY destructively uppercases entry in removing it from
|
|---|
| 100 | ;;; the dictionary. First entry is looked up, and if it is found due to a
|
|---|
| 101 | ;;; flag, the flag is cleared in the descriptor table. If entry is a root
|
|---|
| 102 | ;;; word in the dictionary (that is, looked up without the use of a flag),
|
|---|
| 103 | ;;; then the root and all its derivitives are deleted by setting its
|
|---|
| 104 | ;;; dictionary location to spell-deleted-entry.
|
|---|
| 105 | ;;;
|
|---|
| 106 | (defun spell-remove-entry (dictionary entry)
|
|---|
| 107 | "Removes ENTRY from DICTIONARY, so it will be an unknown word. Entry
|
|---|
| 108 | is a simple string and is destructively modified. If entry is a root
|
|---|
| 109 | word, then all words derived with entry and its flags will also be deleted."
|
|---|
| 110 | (declare (simple-string entry))
|
|---|
| 111 | (nstring-upcase entry)
|
|---|
| 112 | (let ((entry-len (length entry)))
|
|---|
| 113 | (declare (fixnum entry-len))
|
|---|
| 114 | (when (<= 2 entry-len max-entry-length)
|
|---|
| 115 | (multiple-value-bind (index flagp)
|
|---|
| 116 | (spell-try-word entry entry-len)
|
|---|
| 117 | (when index
|
|---|
| 118 | (if flagp
|
|---|
| 119 | (setf (descriptor-ref (+ 2 index))
|
|---|
| 120 | (logandc2 (descriptor-ref (+ 2 index)) flagp))
|
|---|
| 121 | (let* ((hash (string-hash entry entry-len))
|
|---|
| 122 | (hash-and-len (dpb (the fixnum (ldb new-hash-byte hash))
|
|---|
| 123 | stored-hash-byte
|
|---|
| 124 | (the fixnum entry-len)))
|
|---|
| 125 | (loc (rem hash (the fixnum *dictionary-size*)))
|
|---|
| 126 | (loc-contents (dictionary-ref loc)))
|
|---|
| 127 | (declare (fixnum hash hash-and-len loc))
|
|---|
| 128 | (cond ((zerop loc-contents) nil)
|
|---|
| 129 | ((found-entry-p loc-contents entry entry-len hash-and-len)
|
|---|
| 130 | (setf (dictionary-ref loc) spell-deleted-entry))
|
|---|
| 131 | (t
|
|---|
| 132 | (hash2-loop (loop-loc loc-contents) loc hash
|
|---|
| 133 | nil
|
|---|
| 134 | (when (found-entry-p loc-contents entry
|
|---|
| 135 | entry-len hash-and-len)
|
|---|
| 136 | (setf (dictionary-ref loop-loc)
|
|---|
| 137 | spell-deleted-entry)
|
|---|
| 138 | (return spell-deleted-entry))))))))))))
|
|---|
| 139 |
|
|---|
| 140 | (defun spell-root-flags (dictionary index)
|
|---|
| 141 | "Return the flags associated with the root word corresponding to a
|
|---|
| 142 | dictionary entry at index."
|
|---|
| 143 | (let* ((descriptor (descriptor-ref dictionary index))
|
|---|
| 144 | (desc-flags (desc-flags descriptor)))
|
|---|
| 145 | (loop for element in flag-names-to-masks
|
|---|
| 146 | unless (zerop (logand (cdr element) desc-flags))
|
|---|
| 147 | collect (car element))))
|
|---|
| 148 |
|
|---|
| 149 | |
|---|
| 150 |
|
|---|
| 151 | ;;;; Growing Dictionary Structures
|
|---|
| 152 |
|
|---|
| 153 | ;;; GROW-DESCRIPTORS grows the descriptors vector by 10%.
|
|---|
| 154 | ;;;
|
|---|
| 155 | (defun grow-descriptors (dictionary)
|
|---|
| 156 | (let* ((old-size (+ (the fixnum *descriptors-size*)
|
|---|
| 157 | (the fixnum *free-descriptor-elements*)))
|
|---|
| 158 | (new-size (truncate (* old-size 1.1)))
|
|---|
| 159 | (new-bytes (* new-size 2))
|
|---|
| 160 | (new-sap (allocate-bytes new-bytes)))
|
|---|
| 161 | (declare (fixnum new-size old-size))
|
|---|
| 162 | (sap-replace new-sap *descriptors* 0 0
|
|---|
| 163 | (* 2 (the fixnum *descriptors-size*)))
|
|---|
| 164 | (deallocate-bytes (system-address *descriptors*) (* 2 old-size))
|
|---|
| 165 | (setf *free-descriptor-elements*
|
|---|
| 166 | (- new-size (the fixnum *descriptors-size*)))
|
|---|
| 167 | (setf *descriptors* new-sap)))
|
|---|
| 168 |
|
|---|
| 169 | ;;; GROW-STRING-TABLE grows the string table by 10%.
|
|---|
| 170 | ;;;
|
|---|
| 171 | (defun grow-string-table (dictionary)
|
|---|
| 172 | (let* ((old-size (+ (the fixnum *string-table-size*)
|
|---|
| 173 | (the fixnum *free-string-table-bytes*)))
|
|---|
| 174 | (new-size (truncate (* old-size 1.1)))
|
|---|
| 175 | (new-sap (allocate-bytes new-size)))
|
|---|
| 176 | (declare (fixnum new-size old-size))
|
|---|
| 177 | (sap-replace new-sap *string-table* 0 0 *string-table-size*)
|
|---|
| 178 | (setf *free-string-table-bytes*
|
|---|
| 179 | (- new-size (the fixnum *string-table-size*)))
|
|---|
| 180 | (deallocate-bytes (system-address *string-table*) old-size)
|
|---|
| 181 | (setf *string-table* new-sap)))
|
|---|