source: release/1.11/source/cocoa-ide/hemlock/unused/archive/spell/build.lisp

Last change on this file was 6, checked in by Gary Byers, 21 years ago

Initial revision

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 8.0 KB
Line 
1;;; -*- Log: hemlock.log; Package: Spell -*-
2;;;
3;;; **********************************************************************
4;;; This code was written as part of the CMU Common Lisp project at
5;;; Carnegie Mellon University, and has been placed in the public domain.
6;;;
7;;; **********************************************************************
8;;;
9;;; Written by Bill Chiles
10;;; Designed by Bill Chiles and Rob Maclachlan
11;;;
12
13;;; This file contains code to build a new binary dictionary file from
14;;; text in system space. This code relies on implementation dependent
15;;; code from spell-rt.lisp. Also, it is expected that spell-corr.lisp
16;;; and spell-aug.lisp have been loaded. In order to compile this file,
17;;; you must first compile spell-rt, spell-corr.lisp, and spell-aug.lisp.
18
19;;; The text file must be in the following format:
20;;; entry1/flag1/flag2/flag3
21;;; entry2
22;;; entry3/flag1/flag2/flag3/flag4/flag5.
23;;; The flags are single letter indicators of legal suffixes for the entry;
24;;; the available flags and their correct use may be found at the beginning
25;;; of spell-corr.lisp in the Hemlock sources. There must be exactly one
26;;; entry per line, and each line must be flushleft.
27
28
29(in-package "SPELL")
30
31;;; An interesting value when building an initial dictionary.
32(defvar *collision-count* 0)
33
34(defvar *new-dictionary*)
35(defvar *new-descriptors*)
36(defvar *new-string-table*)
37
38(declaim (optimize (debug 3)))
39
40
41
42;;;; Constants
43
44;;; This is an upper bound estimate of the number of stored entries in the
45;;; dictionary. It should not be more than 21,845 because the dictionary
46;;; is a vector of type '(unsigned-byte 16), and the descriptors' vector
47;;; for the entries uses three '(unsigned-byte 16) elements per descriptor
48;;; unit. See the beginning of Spell-Correct.Lisp.
49;;;
50(eval-when (:compile-toplevel :load-toplevel :execute)
51
52(defconstant +max-entry-count-estimate+ 15600)
53
54(defconstant +new-dictionary-size+ 20011)
55
56(defconstant +new-descriptors-size+ (1+ +max-entry-count-estimate+))
57
58(defconstant +max-string-table-length+ (* 10 +max-entry-count-estimate+))
59
60); eval-when
61
62
63
64;;;; Hashing
65
66;;; These hashing macros are different from the ones in Spell-Correct.Lisp
67;;; simply because we are using separate space and global specials/constants.
68;;; Of course, they should be identical, but it doesn't seem worth cluttering
69;;; up Spell-Correct with macro generating macros for this file.
70
71;;; Well, we've made them functions now. we should really clean up the
72;;; other macros mentioned above by merging them with these
73
74(declaim (inline hash-increment handle-collision get-hash-index))
75(defun hash-increment (hash size)
76 (- size 2 (rem hash (- size 2))))
77
78(defun handle-collision (descriptor-table hash location)
79 (do* ((incr (hash-increment hash +new-dictionary-size+))
80 (collide-location (rem (+ location incr)
81 +new-dictionary-size+)
82 (rem (+ collide-location incr)
83 +new-dictionary-size+)))
84 ;; if we've found our way back to where we started, there are
85 ;; no free slots available. indicate failure.
86 ((= collide-location location) nil)
87 (when (zerop (aref descriptor-table collide-location))
88 (return-from handle-collision collide-location))))
89
90(defun get-hash-index (descriptor-table entry entry-length)
91 "Finds a suitable position in DESCRIPTOR-TABLE for ENTRY.
92 Returns NIL if one cannot be located."
93 (let* ((hash (string-hash entry entry-length))
94 (location (rem hash +new-dictionary-size+)))
95 (cond
96 ((not (zerop (aref descriptor-table location)))
97 ;; crud. the desirable spot was already taken. hunt for another
98 (incf *collision-count*)
99 (handle-collision descriptor-table hash location))
100 (t location))))
101
102
103
104;;;; Build-Dictionary
105
106(defun build-dictionary (input output)
107 (let* ((descriptors (make-array +new-descriptors-size+))
108 (string-table (make-string +max-string-table-length+))
109 (descriptor-table (make-array +new-dictionary-size+
110 :element-type '(unsigned-byte 16)))
111 (new-dictionary (make-instance 'dictionary
112 :string-table string-table
113 :descriptors descriptors
114 :descriptor-table descriptor-table)))
115 (write-line "Reading dictionary ...")
116 (force-output)
117 (setf *collision-count* 0)
118 (multiple-value-bind (entry-count string-table-length)
119 (read-initial-dictionary input descriptor-table
120 descriptors string-table)
121 (write-line "Writing dictionary ...")
122 (force-output)
123 (write-dictionary output new-dictionary entry-count string-table-length)
124 (format t "~D entries processed with ~D collisions."
125 entry-count *collision-count*)
126 new-dictionary)))
127
128(defun read-initial-dictionary (f dictionary descriptors string-table)
129 (let* ((filename (pathname f))
130 (s (open filename :direction :input :if-does-not-exist nil)))
131 (unless s (error "File ~S does not exist." f))
132 (multiple-value-prog1
133 (let ((descriptor-ptr 1)
134 (string-ptr 0)
135 (entry-count 0))
136 (declare (fixnum descriptor-ptr string-ptr entry-count))
137 (loop (multiple-value-bind (line eofp) (read-line s nil nil)
138 (declare (type (or null simple-string) line))
139 (unless line (return (values entry-count string-ptr)))
140 (incf entry-count)
141 (when (> entry-count +max-entry-count-estimate+)
142 (error "There are too many entries in text file!~%~
143 Please change constants in spell-build.lisp, ~
144 recompile the file, and reload it.~%~
145 Be sure to understand the constraints of permissible ~
146 values."))
147 (let ((flags (or (position #\/ line :test #'char=)
148 (length line))))
149 (declare (fixnum flags))
150 (cond ((> flags +max-entry-length+)
151 (format t "Entry ~s too long." (subseq line 0 flags))
152 (force-output))
153 (t (let ((new-string-ptr (+ string-ptr flags)))
154 (declare (fixnum new-string-ptr))
155 (when (> new-string-ptr +max-string-table-length+)
156 (error "Spell string table overflow!~%~
157 Please change constants in ~
158 spell-build.lisp, recompile the file, ~
159 and reload it.~%~
160 Be sure to understand the constraints ~
161 of permissible values."))
162 (spell-place-entry line flags
163 dictionary descriptors string-table
164 descriptor-ptr string-ptr)
165 (incf descriptor-ptr)
166 (setf string-ptr new-string-ptr)))))
167 (when eofp (return (values entry-count string-ptr))))))
168 (close s))))
169
170(defun word-flags (line word-end)
171 (declare (simple-string line) (fixnum word-end))
172 (let ((word-flags 0))
173 (do ((flag (1+ word-end) (+ 2 flag))
174 (line-end (length line)))
175 ((>= flag line-end) word-flags)
176 (declare (fixnum flag line-end))
177 (let ((flag-mask (flag-mask (schar line flag))))
178 (declare (fixnum flag-mask))
179 (if (zerop flag-mask)
180 (format t "Illegal flag ~S on word ~S."
181 (schar line flag) (subseq line 0 word-end))
182 (setf word-flags
183 (logior flag-mask word-flags)))))))
184
185(defun spell-place-entry (line word-end dictionary descriptors string-table
186 descriptor-ptr string-ptr)
187 (declare (simple-string line string-table)
188 (fixnum word-end descriptor-ptr string-ptr))
189 (nstring-upcase line :end word-end)
190 (let* ((hash-loc (get-hash-index dictionary line word-end)))
191 (unless hash-loc (error "Dictionary Overflow!"))
192 (setf (aref dictionary hash-loc) descriptor-ptr)
193 (let* ((hash-code (ldb +new-hash-byte+
194 (string-hash line word-end)))
195 (descriptor (make-descriptor :hash-code hash-code
196 :length word-end
197 :string-index string-ptr)))
198 (setf (desc-flags descriptor) (word-flags line word-end)
199 (aref descriptors descriptor-ptr) descriptor)
200 (replace string-table line :start1 string-ptr :end2 word-end))))
Note: See TracBrowser for help on using the repository browser.