source: release/1.7/source/cocoa-ide/hemlock/unused/archive/spell/correlate.lisp

Last change on this file was 6, checked in by Gary Byers, 21 years ago

Initial revision

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 26.3 KB
Line 
1;;; -*- Log: hemlock.log; Package: Spell -*-
2;;;
3;;; **********************************************************************
4;;; This code was written as part of the CMU Common Lisp project at
5;;; Carnegie Mellon University, and has been placed in the public domain.
6;;;
7;;; **********************************************************************
8;;;
9;;; Written by Bill Chiles
10;;; Designed by Bill Chiles and Rob Maclachlan
11;;;
12
13;;; This is the file that deals with checking and correcting words
14;;; using a dictionary read in from a binary file. It has been written
15;;; from the basic ideas used in Ispell (on DEC-20's) which originated as
16;;; Spell on the ITS machines at MIT. There are flags which have proper
17;;; uses defined for them that indicate permissible suffixes to entries.
18;;; This allows for about three times as many known words than are actually
19;;; stored. When checking the spelling of a word, first it is looked up;
20;;; if this fails, then possible roots are looked up, and if any has the
21;;; appropriate suffix flag, then the word is considered to be correctly
22;;; spelled. For an unknown word, the following rules define "close" words
23;;; which are possible corrections:
24;;; 1] two adjacent letters are transposed to form a correct spelling;
25;;; 2] one letter is changed to form a correct spelling;
26;;; 3] one letter is added to form a correct spelling; and/or
27;;; 4] one letter is removed to form a correct spelling.
28;;; There are two restrictions on the length of a word in regards to its
29;;; worthiness of recognition: it must be at least more than two letters
30;;; long, and if it has a suffix, then it must be at least four letters
31;;; long. More will be said about this when the flags are discussed.
32;;; This is implemented in as tense a fashion as possible, and it uses
33;;; implementation dependent code from Spell-RT.Lisp to accomplish this.
34;;; In general the file I/O and structure accesses encompass the system
35;;; dependencies.
36
37;;; This next section will discuss the storage of the dictionary
38;;; information. There are three data structures that "are" the
39;;; dictionary: a hash table, descriptors table, and a string table. The
40;;; hash table is a vector of type '(unsigned-byte 16), whose elements
41;;; point into the descriptors table. This is a cyclic hash table to
42;;; facilitate dumping it to a file. The descriptors table (also of type
43;;; '(unsigned-byte 16)) dedicates three elements to each entry in the
44;;; dictionary. Each group of three elements has the following organization
45;;; imposed on them:
46;;; ----------------------------------------------
47;;; | 15..5 hash code | 4..0 length |
48;;; ----------------------------------------------
49;;; | 15..0 character index |
50;;; ----------------------------------------------
51;;; | 15..14 character index | 13..0 flags |
52;;; ----------------------------------------------
53;;; "Length" is the number of characters in the entry; "hash code" is some
54;;; eleven bits from the hash code to allow for quicker lookup, "flags"
55;;; indicate possible suffixes for the basic entry, and "character index"
56;;; is the index of the start of the entry in the string table.
57;;; This was originally adopted due to the Perq's word size (can you guess?
58;;; 16 bits, that's right). Note the constraint that is placed on the number
59;;; of the entries, 21845, because the hash table could not point to more
60;;; descriptor units (16 bits of pointer divided by three). Since a value of
61;;; zero as a hash table element indicates an empty location, the zeroth element
62;;; of the descriptors table must be unused (it cannot be pointed to).
63
64
65;;; The following is a short discussion with examples of the correct
66;;; use of the suffix flags. Let # and @ be symbols that can stand for any
67;;; single letter. Upper case letters are constants. "..." stands for any
68;;; string of zero or more letters, but note that no word may exist in the
69;;; dictionary which is not at least 2 letters long, so, for example, FLY
70;;; may not be produced by placing the "Y" flag on "F". Also, no flag is
71;;; effective unless the word that it creates is at least 4 letters long,
72;;; so, for example, WED may not be produced by placing the "D" flag on
73;;; "WE". These flags and examples are from the Ispell documentation with
74;;; only slight modifications. Here are the correct uses of the flags:
75;;;
76;;; "V" flag:
77;;; ...E => ...IVE as in create => creative
78;;; if # .ne. E, then ...# => ...#IVE as in prevent => preventive
79;;;
80;;; "N" flag:
81;;; ...E => ...ION as in create => creation
82;;; ...Y => ...ICATION as in multiply => multiplication
83;;; if # .ne. E or Y, then ...# => ...#EN as in fall => fallen
84;;;
85;;; "X" flag:
86;;; ...E => ...IONS as in create => creations
87;;; ...Y => ...ICATIONS as in multiply => multiplications
88;;; if # .ne. E or Y, ...# => ...#ENS as in weak => weakens
89;;;
90;;; "H" flag:
91;;; ...Y => ...IETH as in twenty => twentieth
92;;; if # .ne. Y, then ...# => ...#TH as in hundred => hundredth
93;;;
94;;; "Y" FLAG:
95;;; ... => ...LY as in quick => quickly
96;;;
97;;; "G" FLAG:
98;;; ...E => ...ING as in file => filing
99;;; if # .ne. E, then ...# => ...#ING as in cross => crossing
100;;;
101;;; "J" FLAG"
102;;; ...E => ...INGS as in file => filings
103;;; if # .ne. E, then ...# => ...#INGS as in cross => crossings
104;;;
105;;; "D" FLAG:
106;;; ...E => ...ED as in create => created
107;;; if @ .ne. A, E, I, O, or U,
108;;; then ...@Y => ...@IED as in imply => implied
109;;; if # = Y, and @ = A, E, I, O, or U,
110;;; then ...@# => ...@#ED as in convey => conveyed
111;;; if # .ne. E or Y, then ...# => ...#ED as in cross => crossed
112;;;
113;;; "T" FLAG:
114;;; ...E => ...EST as in late => latest
115;;; if @ .ne. A, E, I, O, or U,
116;;; then ...@Y => ...@IEST as in dirty => dirtiest
117;;; if # = Y, and @ = A, E, I, O, or U,
118;;; then ...@# => ...@#EST as in gray => grayest
119;;; if # .ne. E or Y, then ...# => ...#EST as in small => smallest
120;;;
121;;; "R" FLAG:
122;;; ...E => ...ER as in skate => skater
123;;; if @ .ne. A, E, I, O, or U,
124;;; then ...@Y => ...@IER as in multiply => multiplier
125;;; if # = Y, and @ = A, E, I, O, or U,
126;;; then ...@# => ...@#ER as in convey => conveyer
127;;; if # .ne. E or Y, then ...# => ...#ER as in build => builder
128;;;
129
130;;; "Z FLAG:
131;;; ...E => ...ERS as in skate => skaters
132;;; if @ .ne. A, E, I, O, or U,
133;;; then ...@Y => ...@IERS as in multiply => multipliers
134;;; if # = Y, and @ = A, E, I, O, or U,
135;;; then ...@# => ...@#ERS as in slay => slayers
136;;; if # .ne. E or Y, then ...@# => ...@#ERS as in build => builders
137;;;
138;;; "S" FLAG:
139;;; if @ .ne. A, E, I, O, or U,
140;;; then ...@Y => ...@IES as in imply => implies
141;;; if # .eq. S, X, Z, or H,
142;;; then ...# => ...#ES as in fix => fixes
143;;; if # .ne. S, X, Z, H, or Y,
144;;; then ...# => ...#S as in bat => bats
145;;; if # = Y, and @ = A, E, I, O, or U,
146;;; then ...@# => ...@#S as in convey => conveys
147;;;
148;;; "P" FLAG:
149;;; if # .ne. Y, or @ = A, E, I, O, or U,
150;;; then ...@# => ...@#NESS as in late => lateness and
151;;; gray => grayness
152;;; if @ .ne. A, E, I, O, or U,
153;;; then ...@Y => ...@INESS as in cloudy => cloudiness
154;;;
155;;; "M" FLAG:
156;;; ... => ...'S as in DOG => DOG'S
157
158(in-package "SPELL")
159
160
161
162;;;; Some Specials and Accesses
163
164;;; *spell-aeiou* will have bits on that represent the capital letters
165;;; A, E, I, O, and U to be used to determine if some word roots are legal
166;;; for looking up.
167;;;
168(defvar *aeiou*
169 (make-array 128 :element-type 'bit :initial-element 0))
170
171(setf (aref *aeiou* (char-code #\A)) 1)
172(setf (aref *aeiou* (char-code #\E)) 1)
173(setf (aref *aeiou* (char-code #\I)) 1)
174(setf (aref *aeiou* (char-code #\O)) 1)
175(setf (aref *aeiou* (char-code #\U)) 1)
176
177
178;;; *sxzh* will have bits on that represent the capital letters
179;;; S, X, Z, and H to be used to determine if some word roots are legal for
180;;; looking up.
181;;;
182(defvar *sxzh*
183 (make-array 128 :element-type 'bit :initial-element 0))
184
185(setf (aref *sxzh* (char-code #\S)) 1)
186(setf (aref *sxzh* (char-code #\X)) 1)
187(setf (aref *sxzh* (char-code #\Z)) 1)
188(setf (aref *sxzh* (char-code #\H)) 1)
189
190
191;;; SET-MEMBER-P will be used with *aeiou* and *sxzh* to determine if a
192;;; character is in the specified set.
193;;;
194(declaim (inline set-member-p))
195(defun set-member-p (char set)
196 (not (zerop (the fixnum (aref (the simple-bit-vector set)
197 (char-code char))))))
198
199;;; DESC-TABLE-REF and DESCRIPTOR-REF are references to implementation
200;;; dependent structures.
201;;;
202(declaim (inline desc-table-ref descriptor-ref))
203(defun desc-table-ref (dictionary index)
204 (aref (descriptor-table dictionary) index))
205(defun %set-desc-table-ref (dictionary index value)
206 (setf (aref (descriptor-table dictionary) index) value))
207
208(defsetf desc-table-ref %set-desc-table-ref)
209
210(defun descriptor-ref (dictionary index)
211 (aref (descriptors dictionary) index))
212
213
214;;; DESCRIPTOR-STRING-START access an entry's (indicated by idx)
215;;; descriptor unit (described at the beginning of the file) and returns
216;;; the start index of the entry in the string table. The second of three
217;;; words in the descriptor holds the 16 least significant bits of 18, and
218;;; the top two bits of the third word are the 2 most significant bits.
219;;; These 18 bits are the index into the string table.
220;;;
221(defun descriptor-string-start (dictionary index)
222 (desc-string-index (descriptor-ref dictionary index)))
223
224
225
226;;;; Top level Checking/Correcting
227
228;;; CORRECT-SPELLING can be called from top level to check/correct a words
229;;; spelling. It is not used for any other purpose.
230;;;
231(defun correct-spelling (dictionary word)
232 "Check/correct the spelling of word. Output is done to *standard-output*."
233 (setf word (coerce word 'simple-string))
234 (let ((word (string-upcase (the simple-string word)))
235 (word-len (length (the simple-string word))))
236 (declare (simple-string word) (fixnum word-len))
237 (when (= word-len 1)
238 (error "Single character words are not in the dictionary."))
239 (when (> word-len +max-entry-length+)
240 (error "~A is too long for the dictionary." word))
241 (multiple-value-bind (idx used-flag-p)
242 (spell-try-word dictionary word word-len)
243 (if idx
244 (format t "Found it~:[~; because of ~A~]." used-flag-p
245 (spell-root-word dictionary idx))
246 (let ((close-words (spell-collect-close-words dictionary word)))
247 (if close-words
248 (format *standard-output*
249 "The possible correct spelling~[~; is~:;s are~]:~
250 ~:*~[~; ~{~A~}~;~{ ~A~^ and~}~:;~
251 ~{~#[~; and~] ~A~^,~}~]."
252 (length close-words)
253 close-words)
254 (format *standard-output* "Word not found.")))))))
255
256
257(defun spell-root-word (dictionary index)
258 "Return the root word corresponding to a dictionary entry at index."
259 (let* ((descriptor (descriptor-ref dictionary index))
260 (start (desc-string-index descriptor))
261 (len (desc-length descriptor)))
262 (declare (fixnum start len))
263 ;; return a copy
264 (subseq (string-table dictionary) start (+ start len))))
265
266
267;;; SPELL-COLLECT-CLOSE-WORDS Returns a list of all "close" correctly spelled
268;;; words. The definition of "close" is at the beginning of the file, and
269;;; there are four sections to this function which collect each of the four
270;;; different kinds of close words.
271;;;
272(defun spell-collect-close-words (dictionary word)
273 "Returns a list of all \"close\" correctly spelled words. This has the
274 same contraints as SPELL-TRY-WORD, which you have probably already called
275 if you are calling this."
276 (declare (simple-string word))
277 (let* ((word-len (length word))
278 (word-len--1 (1- word-len))
279 (word-len-+1 (1+ word-len))
280 (result ())
281 (correcting-buffer (make-string +max-entry-length+)))
282 (macrolet ((check-closeness (dictionary word word-len closeness-list)
283 `(when (spell-try-word ,dictionary ,word ,word-len)
284 (pushnew (subseq ,word 0 ,word-len)
285 ,closeness-list :test #'string=))))
286 (declare (simple-string correcting-buffer)
287 (fixnum word-len word-len--1 word-len-+1))
288 (replace correcting-buffer word :end1 word-len :end2 word-len)
289
290 ;; Misspelled because one letter is different.
291 (dotimes (i word-len)
292 (do ((save-char (schar correcting-buffer i))
293 (alphabet +spell-alphabet+ (cdr alphabet)))
294 ((null alphabet)
295 (setf (schar correcting-buffer i) save-char))
296 (setf (schar correcting-buffer i) (car alphabet))
297 (check-closeness dictionary correcting-buffer word-len result)))
298
299 ;; Misspelled because two adjacent letters are transposed.
300 (dotimes (i word-len--1)
301 (rotatef (schar correcting-buffer i) (schar correcting-buffer (1+ i)))
302 (check-closeness dictionary correcting-buffer word-len result)
303 (rotatef (schar correcting-buffer i) (schar correcting-buffer (1+ i))))
304
305 ;; Misspelled because of extraneous letter.
306 (replace correcting-buffer word
307 :start2 1 :end1 word-len--1 :end2 word-len)
308 (check-closeness dictionary correcting-buffer word-len--1 result)
309 (dotimes (i word-len--1)
310 (setf (schar correcting-buffer i) (schar word i))
311 (replace correcting-buffer word
312 :start1 (1+ i) :start2 (+ i 2) :end1 word-len--1 :end2 word-len)
313 (check-closeness dictionary correcting-buffer word-len--1 result))
314
315 ;; Misspelled because a letter is missing.
316 (replace correcting-buffer word
317 :start1 1 :end1 word-len-+1 :end2 word-len)
318 (dotimes (i word-len-+1)
319 (do ((alphabet +spell-alphabet+ (cdr alphabet)))
320 ((null alphabet)
321 (rotatef (schar correcting-buffer i)
322 (schar correcting-buffer (1+ i))))
323 (setf (schar correcting-buffer i) (car alphabet))
324 (check-closeness dictionary correcting-buffer word-len-+1 result)))
325 result)))
326
327;;; SPELL-TRY-WORD The literal 4 is not a constant defined somewhere since it
328;;; is part of the definition of the function of looking up words.
329;;; TRY-WORD-ENDINGS relies on the guarantee that word-len is at least 4.
330;;;
331(defun spell-try-word (dictionary word word-len)
332 "See if the word or an appropriate root is in the spelling dicitionary.
333 Word-len must be inclusively in the range 2..max-entry-length."
334 (or (lookup-entry dictionary word word-len)
335 (if (>= (the fixnum word-len) +minimum-try-word-endings-length+)
336 (try-word-endings dictionary word word-len))))
337
338
339
340
341;;;; Divining Correct Spelling
342
343(eval-when (:compile-toplevel :execute)
344
345(defmacro setup-root-buffer (word buffer root-len)
346 `(replace ,buffer ,word :end1 ,root-len :end2 ,root-len))
347
348(defmacro try-root (dictionary word root-len flag-mask)
349 (let ((result (gensym)))
350 `(let ((,result (lookup-entry ,dictionary ,word ,root-len)))
351 (if (and ,result (descriptor-flag ,dictionary ,result ,flag-mask))
352 (return (values ,result ,flag-mask))))))
353
354;;; TRY-MODIFIED-ROOT is used for root words that become truncated
355;;; when suffixes are added (e.g., skate => skating). Char-idx is the last
356;;; character in the root that has to typically be changed from a #\I to a
357;;; #\Y or #\E.
358;;;
359(defmacro try-modified-root (dictionary word buffer
360 root-len flag-mask char-idx new-char)
361 (let ((root-word (gensym)))
362 `(let ((,root-word (setup-root-buffer ,word ,buffer ,root-len)))
363 (setf (schar ,root-word ,char-idx) ,new-char)
364 (try-root ,dictionary ,root-word ,root-len ,flag-mask))))
365
366) ;eval-when
367
368(defvar *rooting-buffer* (make-string +max-entry-length+))
369
370;;; TRY-WORD-ENDINGS takes a word that is at least of length 4 and
371;;; returns multiple values on success (the index where the word's root's
372;;; descriptor starts and :used-flag), otherwise nil. It looks at
373;;; characters from the end to the beginning of the word to determine if it
374;;; has any known suffixes. This is a VERY simple finite state machine
375;;; where all of the suffixes are narrowed down to one possible one in at
376;;; most two state changes. This is a PROG form for speed, and in some sense,
377;;; readability. The states of the machine are the flag names that denote
378;;; suffixes. The two points of branching to labels are the very beginning
379;;; of the PROG and the S state. This is a fairly straight forward
380;;; implementation of the flag rules presented at the beginning of this
381;;; file, with char-idx checks, so we do not index the string below zero.
382
383(defun try-word-endings (dictionary word word-len)
384 (declare (simple-string word)
385 (fixnum word-len))
386 (prog* ((char-idx (1- word-len))
387 (char (schar word char-idx))
388 (rooting-buffer *rooting-buffer*)
389 flag-mask)
390 (declare (simple-string rooting-buffer)
391 (fixnum char-idx))
392 (case char
393 (#\S (go S)) ;This covers over half of the possible endings
394 ;by branching off the second to last character
395 ;to other flag states that have plural endings.
396 (#\R (setf flag-mask +R-mask+) ;"er" and "ier"
397 (go D-R-Z-FLAG))
398 (#\T (go T-FLAG)) ;"est" and "iest"
399 (#\D (setf flag-mask +D-mask+) ;"ed" and "ied"
400 (go D-R-Z-FLAG))
401 (#\H (go H-FLAG)) ;"th" and "ieth"
402 (#\N (setf flag-mask +N-mask+) ;"ion", "ication", and "en"
403 (go N-X-FLAG))
404 (#\G (setf flag-mask +G-mask+) ;"ing"
405 (go G-J-FLAG))
406 (#\Y (go Y-FLAG)) ;"ly"
407 (#\E (go V-FLAG))) ;"ive"
408 (return nil)
409
410 S
411 (setf char-idx (1- char-idx))
412 (setf char (schar word char-idx))
413 (if (char= char #\Y)
414 (if (set-member-p (schar word (1- char-idx)) *aeiou*)
415 (try-root dictionary word (1+ char-idx) +S-mask+)
416 (return nil))
417 (if (not (set-member-p char *sxzh*))
418 (try-root dictionary word (1+ char-idx) +S-mask+)))
419 (case char
420 (#\E (go S-FLAG)) ;"es" and "ies"
421 (#\R (setf flag-mask +Z-mask+) ;"ers" and "iers"
422 (go D-R-Z-FLAG))
423 (#\G (setf flag-mask +J-mask+) ;"ings"
424 (go G-J-FLAG))
425 (#\S (go P-FLAG)) ;"ness" and "iness"
426 (#\N (setf flag-mask +X-mask+) ;"ions", "ications", and "ens"
427 (go N-X-FLAG))
428 (#\' (try-root dictionary word char-idx +M-mask+)))
429 (return nil)
430
431 S-FLAG
432 (setf char-idx (1- char-idx))
433 (setf char (schar word char-idx))
434 (if (set-member-p char *sxzh*)
435 (try-root dictionary word (1+ char-idx) +S-mask+))
436 (if (and (char= char #\I)
437 (not (set-member-p (schar word (1- char-idx)) *aeiou*)))
438 (try-modified-root dictionary word rooting-buffer (1+ char-idx)
439 +S-mask+ char-idx #\Y))
440 (return nil)
441
442 D-R-Z-FLAG
443 (if (char/= (schar word (1- char-idx)) #\E) (return nil))
444 (try-root dictionary word char-idx flag-mask)
445 (if (<= (setf char-idx (- char-idx 2)) 0) (return nil))
446 (setf char (schar word char-idx))
447 (if (char= char #\Y)
448 (if (set-member-p (schar word (1- char-idx)) *aeiou*)
449 (try-root dictionary word (1+ char-idx) flag-mask)
450 (return nil))
451 (if (char/= (schar word char-idx) #\E)
452 (try-root dictionary word (1+ char-idx) flag-mask)))
453 (if (and (char= char #\I)
454 (not (set-member-p (schar word (1- char-idx)) *aeiou*)))
455 (try-modified-root dictionary word rooting-buffer (1+ char-idx)
456 flag-mask char-idx #\Y))
457 (return nil)
458
459 P-FLAG
460 (if (or (char/= (schar word (1- char-idx)) #\E)
461 (char/= (schar word (- char-idx 2)) #\N))
462 (return nil))
463 (if (<= (setf char-idx (- char-idx 3)) 0) (return nil))
464 (setf char (schar word char-idx))
465 (if (char= char #\Y)
466 (if (set-member-p (schar word (1- char-idx)) *aeiou*)
467 (try-root dictionary word (1+ char-idx) +P-mask+)
468 (return nil)))
469 (try-root dictionary word (1+ char-idx) +P-mask+)
470 (if (and (char= char #\I)
471 (not (set-member-p (schar word (1- char-idx)) *aeiou*)))
472 (try-modified-root dictionary word rooting-buffer (1+ char-idx)
473 +P-mask+ char-idx #\Y))
474 (return nil)
475
476 G-J-FLAG
477 (if (< char-idx 3) (return nil))
478 (setf char-idx (- char-idx 2))
479 (setf char (schar word char-idx))
480 (if (or (char/= char #\I) (char/= (schar word (1+ char-idx)) #\N))
481 (return nil))
482 (if (char/= (schar word (1- char-idx)) #\E)
483 (try-root dictionary word char-idx flag-mask))
484 (try-modified-root dictionary word rooting-buffer (1+ char-idx)
485 flag-mask char-idx #\E)
486 (return nil)
487
488 N-X-FLAG
489 (setf char-idx (1- char-idx))
490 (setf char (schar word char-idx))
491 (cond ((char= char #\E)
492 (setf char (schar word (1- char-idx)))
493 (if (and (char/= char #\Y) (char/= char #\E))
494 (try-root dictionary word char-idx flag-mask))
495 (return nil))
496 ((char= char #\O)
497 (if (char= (schar word (1- char-idx)) #\I)
498 (try-modified-root dictionary word rooting-buffer char-idx
499 flag-mask (1- char-idx) #\E)
500 (return nil))
501 (if (< char-idx 5) (return nil))
502 (if (or (char/= (schar word (- char-idx 2)) #\T)
503 (char/= (schar word (- char-idx 3)) #\A)
504 (char/= (schar word (- char-idx 4)) #\C)
505 (char/= (schar word (- char-idx 5)) #\I))
506 (return nil)
507 (setf char-idx (- char-idx 4)))
508 (try-modified-root dictionary word rooting-buffer char-idx
509 flag-mask (1- char-idx) #\Y))
510 (t (return nil)))
511
512 T-FLAG
513 (if (or (char/= (schar word (1- char-idx)) #\S)
514 (char/= (schar word (- char-idx 2)) #\E))
515 (return nil)
516 (setf char-idx (1- char-idx)))
517 (try-root dictionary word char-idx +T-mask+)
518 (if (<= (setf char-idx (- char-idx 2)) 0) (return nil))
519 (setf char (schar word char-idx))
520 (if (char= char #\Y)
521 (if (set-member-p (schar word (1- char-idx)) *aeiou*)
522 (try-root dictionary word (1+ char-idx) +T-mask+)
523 (return nil))
524 (if (char/= (schar word char-idx) #\E)
525 (try-root dictionary word (1+ char-idx) +T-mask+)))
526 (if (and (char= char #\I)
527 (not (set-member-p (schar word (1- char-idx)) *aeiou*)))
528 (try-modified-root dictionary word rooting-buffer (1+ char-idx)
529 +T-mask+ char-idx #\Y))
530 (return nil)
531
532 H-FLAG
533 (setf char-idx (1- char-idx))
534 (setf char (schar word char-idx))
535 (if (char/= char #\T) (return nil))
536 (if (char/= (schar word (1- char-idx)) #\Y)
537 (try-root dictionary word char-idx +H-mask+))
538 (if (and (char= (schar word (1- char-idx)) #\E)
539 (char= (schar word (- char-idx 2)) #\I))
540 (try-modified-root dictionary word rooting-buffer (1- char-idx)
541 +H-mask+ (- char-idx 2) #\Y))
542 (return nil)
543
544 Y-FLAG
545 (setf char-idx (1- char-idx))
546 (setf char (schar word char-idx))
547 (if (char= char #\L)
548 (try-root dictionary word char-idx +Y-mask+))
549 (return nil)
550
551 V-FLAG
552 (setf char-idx (- char-idx 2))
553 (setf char (schar word char-idx))
554 (if (or (char/= char #\I) (char/= (schar word (1+ char-idx)) #\V))
555 (return nil))
556 (if (char/= (schar word (1- char-idx)) #\E)
557 (try-root dictionary word char-idx +V-mask+))
558 (try-modified-root dictionary word rooting-buffer (1+ char-idx)
559 +V-mask+ char-idx #\E)
560 (return nil)))
561
562
563
564;;; DESCRIPTOR-FLAG returns t or nil based on whether the flag is on.
565;;; From the diagram at the beginning of the file, we see that the flags
566;;; are stored two words off of the first word in the descriptor unit for
567;;; an entry.
568;;;
569;;; Note: modified for new descriptor scheme
570(defun descriptor-flag (dictionary descriptor flag-mask)
571 (not (zerop
572 (the fixnum
573 (logand
574 (the fixnum (desc-flags (descriptor-ref dictionary descriptor)))
575 (the fixnum flag-mask))))))
576
577
578
579;;;; Looking up Trials
580
581;;; these functions used to be macros
582(declaim (inline spell-string= found-entry-p))
583
584(defun spell-string= (string1 string2 end1 start2)
585 (string= string1 string2
586 :end1 end1
587 :start2 start2
588 :end2 (+ start2 end1)))
589
590;;; FOUND-ENTRY-P determines if entry is what is described at idx.
591;;; Hash-and-length is 16 bits that look just like the first word of any
592;;; entry's descriptor unit (see diagram at the beginning of the file). If
593;;; the word stored at idx and entry have the same hash bits and length,
594;;; then we compare characters to see if they are the same.
595;;;
596(defun found-entry-p (dictionary idx entry entry-len hash)
597 (let ((desc (descriptor-ref dictionary idx)))
598 (if (and (= (desc-hash-code desc) hash)
599 (= (desc-length desc) entry-len))
600 hash
601 (spell-string= entry (string-table dictionary) entry-len
602 (desc-string-index desc)))))
603
604(eval-when (:compile-toplevel :execute)
605
606(defmacro hash2-loop ((location-var contents-var)
607 dictionary loc hash zero-contents-form
608 &optional body-form (for-insertion-p nil))
609 (let ((incr (gensym)))
610 `(let* ((,incr (hash-increment ,hash +new-dictionary-size+))
611 (,location-var ,loc)
612 (,contents-var 0))
613 (declare (fixnum ,location-var ,contents-var ,incr))
614 (loop (setf ,location-var
615 (rem (+ ,location-var ,incr) (the fixnum +new-dictionary-size+)))
616 (setf ,contents-var (desc-table-ref ,dictionary ,location-var))
617 (if (zerop ,contents-var) (return ,zero-contents-form))
618 ,@(if for-insertion-p
619 `((if (= ,contents-var spell-deleted-entry)
620 (return ,zero-contents-form))))
621 (if (= ,location-var ,loc) (return nil))
622 ,@(if body-form `(,body-form))))))
623
624) ;eval-when
625
626
627;;; LOOKUP-ENTRY returns the index of the first element of entry's
628;;; descriptor unit on success, otherwise nil.
629;;;
630(defun lookup-entry (dictionary entry &optional length)
631 (declare (simple-string entry))
632 (let* ((entry-length (or length (length entry)))
633 (hash (string-hash entry entry-length))
634 (loc (rem hash (the fixnum +new-dictionary-size+)))
635 (loc-contents (desc-table-ref dictionary loc)))
636 (declare (fixnum entry-length hash loc))
637 (cond ((zerop loc-contents) nil)
638 ((found-entry-p dictionary loc-contents entry entry-length hash)
639 loc-contents)
640 (t
641 (hash2-loop (loop-loc loc-contents)
642 dictionary loc hash
643 nil
644 (if (found-entry-p dictionary loc-contents entry
645 entry-length hash)
646 (return loc-contents)))))))
647
648
Note: See TracBrowser for help on using the repository browser.