source: trunk/disk-page-hash.lisp @ 3

Revision 3, 20.4 KB checked in by gz, 9 years ago (diff)

Recovered version 0.961 from Sheldon Ball <s.ball@…>

  • Property svn:eol-style set to native
Line 
1;;;-*- Mode: Lisp; Package: WOOD -*-
2
3;;;;;;;;;;;;;;;;;;;;;;;;;;
4;;
5;; disk-page-hash.lisp
6;; A simple and very fast hashing mechanism for disk pages
7;;
8;; Copyright © 1996-1999 Digitool, Inc.
9;; Copyright © 1992-1995 Apple Computer, Inc.
10;; All rights reserved.
11;; Permission is given to use, copy, and modify this software provided
12;; that Digitool is given credit in all derivative works.
13;; This software is provided "as is". Digitool makes no warranty or
14;; representation, either express or implied, with respect to this software,
15;; its quality, accuracy, merchantability, or fitness for a particular
16;; purpose.
17;;
18;; Entry points are: make-disk-page-hash, disk-page-gethash,
19;; (setf disk-page-gethash), disk-page-remhash, disk-page-maphash.
20;; They are similar to the Common Lisp hash table functions,
21;; except the table must have integer keys (fixnums if
22;; :wood-fixnum-addresses is on *features* when this file is compiled).
23;;
24
25;;;;;;;;;;;;;;;;;;;;;;;;;;
26;;
27;; Modification History
28;;
29;; 01/10/00 akh moved (pushnew :wood-fixnum-addresses *features*) to block-io-mcl
30;; -------- 0.96
31;; -------- 0.95
32;; -------- 0.94
33;; 03/21/96 bill (make-array ... :INITIAL-ELEMENT NIL) in make-disk-page-hash-table-vector
34;; -------- 0.93
35;; 05/31/95 bill New file
36;;
37
38#|
39Algorithm notes.
40
41This uses the same basic Knuth algorithm as MCL's hash table implementation.
42Hashing is into a vector of size 2**n. Each entry has either a key, a
43deleted marker, or an empty marker. The first hash probe is a mask with
44the key. If that hits the looked-for key, we're done. If it hits an empty
45marker, we're done and the desired key is not in the table. If it hits
46a deleted marker or a different key, we pick a secondary key from the
47*secondary-keys* table. Then add the secondary key, modding with the table
48length until you find the key you're looking for or an empty marker.
49Since all the secondary keys are odd primes (relatively prime to the table
50size), this is guaranteed to hit every element of the vector.
51
52A sequence of insertions and deletions can leave a table that has no empty
53markers. This makes gethash for a key that isn't in the table take a long time.
54Hence, when this condition is detected, the table is rehashed to get rid of
55the deleted markers. The rehashing algorithm is the library card catalog
56algorithm. You keep a counter of which card drawer you last started with.
57pull the drawer out of that slot and increment the counter. Put the drawer
58in your hand where it goes. If this causes you to pull out another drawer,
59then put that one where it goes. Eventually, you'll put a drawer in an empty
60slot and you can go back to the counter's slot. Continue until the counter
61is greater than the number of slots.
62
63|#
64
65(in-package :wood)
66
67#| ;; moved to block-io-mcl
68; Assume fixnum addresses.
69; Comment out this form to compile Wood for files larger than 256 megs.
70(eval-when (:compile-toplevel :execute :load-toplevel)
71  (pushnew :wood-fixnum-addresses *features*))
72|#
73
74(defstruct (disk-page-hash (:constructor cons-disk-page-hash ())
75                              (:print-function print-disk-page-hash))
76  vector                 ; Where the data is stored.
77  vector-length          ; (length vector) - a power of 2
78  size                   ; number of entries that will fit
79  count                  ; number of entries currently in stored
80  mask                   ; (1- (ash vector-length -1))
81  shift                  ; (integer-length mask)
82  secondary-mask         ; mask for length of *secondary-keys* shifted up by shift
83  (cache-address nil)    ; adderss of last reference
84  (cache-value nil)      ; value of last reference
85  (cache-index nil)      ; vector index of last reference
86  page-size              ; The page size of the disk-cache using this hash table
87  page-size-shift        ; (integer-length (1- page-size))
88  bit-vector)            ; for rehashing. Actually an (unsigned-byte 8) vector (faster).
89
90(defun print-disk-page-hash (hash stream level)
91  (declare (ignore level))
92  (print-unreadable-object (hash stream :identity t :type t)
93    (format stream "~d/~d"
94            (disk-page-hash-count hash)
95            (disk-page-hash-size hash))))
96
97(defconstant *secondary-keys*
98  (coerce (mapcar #'(lambda (x) (+ x x)) '(3 5 7 11 13 17 19 23)) 'vector))
99
100(defconstant *secondary-keys-length* (length *secondary-keys*))
101(defconstant *secondary-keys-mask* (1- *secondary-keys-length*))
102
103(assert (eql *secondary-keys-length*
104             (expt 2 (integer-length (1- *secondary-keys-length*)))))
105
106(defconstant *no-key-marker* nil)
107(defconstant *deleted-key-marker* :deleted)
108
109(defparameter *minimum-size*
110  (expt 2 (1- (integer-length (apply 'max (coerce *secondary-keys* 'list))))))
111
112; Not just the default; it's not a parameter.
113(defparameter *default-rehash-threshold* 0.85)
114
115(defun make-disk-page-hash-table-vector (count &optional (rehash-threshold *default-rehash-threshold*))
116  (let* ((nominal-count (max *minimum-size*
117                             (1+ count)
118                             (ceiling count rehash-threshold)))
119         (shift (integer-length (1- nominal-count)))
120         (real-count (expt 2 shift)))
121    (values
122     (make-array (* 2 real-count) :initial-element nil)
123     real-count
124     shift)))
125
126(defun make-disk-page-hash (&key (size 1) (page-size 1))
127  (init-disk-page-hash (cons-disk-page-hash) size page-size))
128
129(defun init-disk-page-hash (hash count page-size)
130  (multiple-value-bind (vector real-count shift) (make-disk-page-hash-table-vector count)
131    (let ((size (truncate (* real-count *default-rehash-threshold*))))
132      (when (eql size real-count)
133        (decf size))
134      (setf (disk-page-hash-vector hash) vector
135            (disk-page-hash-vector-length hash) (length vector)
136            (disk-page-hash-size hash) size
137            (disk-page-hash-count hash) 0
138            (disk-page-hash-mask hash) (1- real-count)
139            (disk-page-hash-shift hash) shift
140            (disk-page-hash-secondary-mask hash) (ash *secondary-keys-mask* shift)
141            (disk-page-hash-cache-address hash) nil
142            (disk-page-hash-cache-value hash) nil
143            (disk-page-hash-cache-index hash) nil
144            (disk-page-hash-page-size hash) page-size
145            (disk-page-hash-page-size-shift hash) (integer-length (1- page-size))
146            (disk-page-hash-bit-vector hash) nil)))
147  hash)
148
149(declaim (inline address-iasr))
150
151(defun address-iasr (count address &optional known-fixnum-p)
152  (declare (fixnum count))
153  #+wood-fixnum-addresses (declare (fixnum address) (ignore known-fixnum-p))
154  (if #+wood-fixnum-addresses t
155      #-wood-fixnum-addresses known-fixnum-p
156      (ccl::%iasr count address)
157      (ash address (the fixnum (- 0 count)))))
158
159; I wanted this to be an inlined function, but MCL's compiler wouldn't inline the knowledge
160; that address was a fixnum.
161(defmacro %disk-page-gethash-macro (address hash &optional fixnum-address?)
162  `(locally (declare (optimize (speed 3) (safety 0)))
163     (if (eql ,address (disk-page-hash-cache-address ,hash))
164       (disk-page-hash-cache-value ,hash)
165       (let* ((page-number (address-iasr (disk-page-hash-page-size-shift ,hash) ,address ,fixnum-address?))
166              (hash-code (logand page-number (the fixnum (disk-page-hash-mask ,hash))))
167              (index (* 2 hash-code))
168              (vector (disk-page-hash-vector ,hash))
169              (probe (svref vector index)))
170         (declare (fixnum hash-code index probe ,@(and fixnum-address? '(page-number)))
171                  (type simple-vector vector))
172         (cond ((eql probe ,address) (aref vector (the fixnum (1+ index))))
173               ((eql probe *no-key-marker*) nil)
174               (t (let ((secondary-key (aref *secondary-keys*
175                                             (ccl::%iasr (disk-page-hash-shift ,hash)
176                                                         (logand page-number (the fixnum (disk-page-hash-secondary-mask ,hash))))))
177                        (vector-length (disk-page-hash-vector-length ,hash))
178                        (original-index index))
179                    (declare (fixnum secondary-key vector-length original-index))
180                    (loop
181                      (incf index secondary-key)
182                      (when (>= index vector-length)
183                        (decf index vector-length))
184                      (when (eql index original-index)
185                        (return nil))
186                      (let ((probe (aref vector index)))
187                        (when (eql probe ,address)
188                          (let ((value (aref vector (the fixnum (1+ index)))))
189                            (setf (disk-page-hash-cache-address hash) address
190                                  (disk-page-hash-cache-value hash) value
191                                  (disk-page-hash-cache-index hash) index)
192                            (return value)))
193                        (when (eql probe *no-key-marker*)
194                          (return nil)))))))))))
195
196; This is one of WOOD's most-called functions.
197; It's important that it be as fast as possible.
198(defun disk-page-gethash (address hash)
199  (declare (optimize (speed 3) (safety 0)))
200  ; Assume if it's non-null that it's of the right type since
201  ; type check takes too long (unless unlined LAP?).
202  ; Need to check for null since disk cache inspectors can remain open
203  ; after their disk cache has been closed.
204  (unless hash
205    (error "Null hash table."))
206  (if #+wood-fixnum-addresses t
207      #-wood-fixnum-addresses (fixnump address)
208    (locally (declare (fixnum address))
209      (%disk-page-gethash-macro address hash t))
210    (%disk-page-gethash-macro address hash)))
211
212(defun (setf disk-page-gethash) (value address hash &optional deleting?)
213  #+wood-fixnum-addresses (declare (fixnum address))
214  (unless (typep hash 'disk-page-hash)
215    (setq hash (require-type hash 'disk-page-hash)))
216  (let ((vector (disk-page-hash-vector hash)))
217    (if (eql address (disk-page-hash-cache-address hash))
218      (let ((index (disk-page-hash-cache-index hash)))
219        (if deleting?
220          (let ((vector (disk-page-hash-vector hash)))
221            (setf (disk-page-hash-cache-address hash) nil
222                  (disk-page-hash-cache-value hash) nil
223                  (disk-page-hash-cache-index hash) nil
224                  (aref vector index) *deleted-key-marker*
225                  (aref vector (the fixnum (1+ index))) nil)
226            (decf (the fixnum (disk-page-hash-count hash)))
227            t)
228          (setf (disk-page-hash-cache-value hash) value
229                (aref vector (1+ index)) value)))
230      (let* ((page-size-shift (disk-page-hash-page-size-shift hash))
231             (page-number (address-iasr page-size-shift address))
232             (hash-code (logand page-number (the fixnum (disk-page-hash-mask hash))))
233             (index (* 2 hash-code))
234             (probe (svref vector index))
235             (new-key? (not deleting?)))
236        (declare (fixnum page-size-shift hash-code index))
237        (or (when (eql probe address)
238              (setq new-key? nil)
239              t)
240            (eql probe *no-key-marker*)
241            (let ((secondary-key (aref *secondary-keys*
242                                       (ccl::%iasr (disk-page-hash-shift hash)
243                                                   (logand page-number (the fixnum (disk-page-hash-secondary-mask hash))))))
244                  (vector-length (length vector))
245                  (first-deletion nil)
246                  (original-index index))
247              (declare (fixnum secondary-key vector-length original-index))
248              (loop
249                (incf index secondary-key)
250                (when (>= index vector-length)
251                  (decf index vector-length))
252                (let ((probe (aref vector index)))
253                  (when (eql probe address)
254                    (setq new-key? nil)
255                    (return t))
256                  (when (and (not deleting?)
257                             (eql index original-index)
258                             (< (disk-page-hash-count hash) (disk-page-hash-size hash)))
259                    (incf (disk-page-hash-count hash))
260                    (return-from disk-page-gethash
261                      (disk-page-rehash hash address value)))
262                  (when (eql probe *no-key-marker*)
263                    (when first-deletion
264                      (setq index first-deletion))
265                    (return t))
266                  (when (eql probe *deleted-key-marker*)
267                    (unless first-deletion
268                      (setq first-deletion index)))))))
269        (when new-key?
270          (let ((count (disk-page-hash-count hash)))
271            (declare (fixnum count))
272            (if (>= count (disk-page-hash-size hash))
273              (return-from disk-page-gethash (grow-disk-page-hash hash address value))
274              (setf (disk-page-hash-count hash) (the fixnum (1+ count))))))
275        (if deleting?
276          (when (integerp (aref vector index))
277            (decf (disk-page-hash-count hash))
278            (setf (disk-page-hash-cache-address hash) nil
279                  (disk-page-hash-cache-value hash) nil
280                  (disk-page-hash-cache-index hash) nil
281                  (aref vector index) *deleted-key-marker*
282                  (aref vector (the fixnum (1+ index))) nil)
283            t)
284          (setf (disk-page-hash-cache-address hash) address
285                (disk-page-hash-cache-value hash) value
286                (disk-page-hash-cache-index hash) index
287                (aref vector index) address
288                (aref vector (the fixnum (1+ index))) value))))))
289
290(defun disk-page-remhash (address hash)
291  (setf (disk-page-gethash address hash t) nil))
292
293(defun disk-page-maphash (function hash)
294  (disk-page-map-vector function (disk-page-hash-vector hash)))
295
296(defun disk-page-map-vector (function vector)
297  (let ((index 0)
298        (length (length vector)))
299    (declare (fixnum index length))
300    (loop
301      (let ((key (ccl::%svref vector index))
302            (value (ccl::%svref vector (incf index))))
303        (incf index)
304        (unless (or (eql key *no-key-marker*) (eql key *deleted-key-marker*))
305          (funcall function key value))
306        (when (>= index length)
307          (return))))))
308
309(defun grow-disk-page-hash (hash address value)
310  (let* ((vector (disk-page-hash-vector hash))
311         (mapper #'(lambda (key value)
312                     (setf (disk-page-gethash key hash) value))))
313    (declare (dynamic-extent mapper))
314    (init-disk-page-hash hash
315                         (* 2 (disk-page-hash-size hash))
316                         (disk-page-hash-page-size hash))
317    (disk-page-map-vector mapper vector)
318    (setf (disk-page-gethash address hash) value)))
319
320; Rehash to get rid of deleted markers. Insert address/value pair
321; This is called when the vector has no empty slots, all are filled
322; with data or delted key markers. In that state a failing gethash
323; takes a long time, so we get rid of the delted markers to speed it up.
324(defun disk-page-rehash (hash address value)
325  (locally
326    (declare (optimize (speed 3) (safety 0)))
327    (setf (disk-page-hash-cache-address hash) *no-key-marker*
328          (disk-page-hash-cache-value hash) nil
329          (disk-page-hash-cache-index hash) nil)
330    (let* ((vector (disk-page-hash-vector hash))
331           (bits (disk-page-hash-bit-vector hash))
332           (vector-length (disk-page-hash-vector-length hash))
333           (page-size-shift (disk-page-hash-page-size-shift hash))
334           (mask (disk-page-hash-mask hash))
335           (shift (disk-page-hash-shift hash))
336           (secondary-mask (disk-page-hash-secondary-mask hash))
337           (loop-index -2)
338           (loop-index+1 -1)
339           (original-value value))
340      (declare (type simple-vector vector)
341               (fixnum vector-length page-size-shift mask shift secondary-mask loop-index loop-index+1))
342      #-wood-fixnum-addresses (declare (fixnum minus-page-size-shift))
343      (flet ((bit-ref (bits index)
344               (declare (type (simple-array (unsigned-byte 8) (*)) bits)
345                        (optimize (speed 3)(safety 0)))
346               (aref bits index))
347             ((setf bit-ref) (v bits index)
348               (declare (type (simple-array (unsigned-byte 8) (*)) bits)
349                        (optimize (speed 3)(safety 0)))
350               (setf (aref bits index) v)))
351        (declare (inline bit-ref (setf bit-ref)))
352        (if (or (null bits) (< (length bits) vector-length))
353          (setq bits
354                ; not really a bit vector because that's too slow
355                (setf (disk-page-hash-bit-vector hash)
356                      (make-array vector-length :element-type '(unsigned-byte 8) :initial-element 0)))
357          (dotimes (i vector-length) (setf (bit-ref bits i) 0)))
358        (loop
359          (unless address
360            (loop
361              (incf loop-index 2)
362              (incf loop-index+1 2)
363              ;(print-db loop-index)
364              (when (>= loop-index vector-length)
365                (return-from disk-page-rehash original-value))
366              (when (eql 0 (bit-ref bits loop-index))
367                (setq address (svref vector loop-index))
368                ;(print-db address)
369                (cond ((eql address *no-key-marker*))
370                      ((eql address *deleted-key-marker*)
371                       (setf (svref vector loop-index) *no-key-marker*
372                             (svref vector loop-index+1) nil))
373                      (t (setq value (svref vector loop-index+1))
374                         (setf (svref vector loop-index) *no-key-marker*
375                               (svref vector loop-index+1) nil)
376                         (return))))))
377          (let* ((integer-address address)
378                 (page-number (address-iasr page-size-shift integer-address))
379                 (hash-code (logand page-number mask))
380                 (index (* 2 hash-code)))
381            #+wood-fixnum-addresses (declare (fixnum integer-address page-number))
382            (declare (fixnum hash-code index))
383            (flet ((insert-p (probe)
384                     (let ((index+1 (1+ index)))
385                       (declare (fixnum index+1))
386                       (cond ((or (eql probe *no-key-marker*) (eql probe *deleted-key-marker*))
387                              (setf (svref vector index) address
388                                    (svref vector index+1) value)
389                              (setq address nil value nil)
390                              (setf (bit-ref bits index) 1))
391                             ((eql 0 (bit-ref bits index))
392                              (setf (svref vector index) address
393                                    address probe)
394                              (rotatef value (svref vector index+1))
395                              (setf (bit-ref bits index) 1))
396                             (t nil)))))
397              (declare (dynamic-extent insert-p))
398              (unless (insert-p (svref vector index))
399                (let ((secondary-key (aref *secondary-keys*
400                                           (ccl::%iasr shift (logand page-number secondary-mask)))))
401                  (declare (fixnum secondary-key))
402                  (loop
403                    (incf index secondary-key)
404                    (when (>= index vector-length)
405                      (decf index vector-length))
406                    ;(print-db index)
407                    (when (insert-p (svref vector index)) (return))))))))))))
408
409; For testing
410#|
411(advise disk-page-rehash
412        (destructuring-bind (hash address value) arglist
413          (prog1
414            (:do-it)
415            (let ((was (disk-page-gethash address hash)))
416              (unless (eq value was)
417                (error "address: ~s, sb: ~s, was: ~s" address value was)))
418            (let ((mapper #'(lambda (a v)
419                              (let ((was (disk-page-gethash a hash)))
420                                (unless (eq was v)
421                                  (error "Address: ~s~%, sb: ~s~%, was: ~s"
422                                         a v was))
423                                (unless (eql a (disk-page-address v))
424                                  (error "Address: ~s for ~s" a v))))))
425              (declare (dynamic-extent mapper))
426              (disk-page-maphash mapper hash))))
427        :when :around
428        :name :debug)
429
430(advise (setf disk-page-gethash)
431        (destructuring-bind (value address hash &optional delete?) arglist
432          (unless (or delete? (eql address (disk-page-address value)))
433            (error "Address: ~s, value: ~s" address value))
434          (:do-it)
435          (unless (eq (disk-page-gethash address hash) value)
436            (error "Bad value")))
437        :when :around
438        :name :debug)
439|#
440;;;    1   6/02/95  bill         1.11d040
441;;;    2   8/01/95  bill         1.11d065
Note: See TracBrowser for help on using the repository browser.