source: branches/lispworks/disk-page-hash.lisp@ 31

Last change on this file since 31 was 7, checked in by Gail Zacharias, 17 years ago

Credit for Anvita

  • Property svn:eol-style set to native
File size: 21.2 KB
Line 
1;;;-*- Mode: Lisp; Package: WOOD -*-
2
3;;;;;;;;;;;;;;;;;;;;;;;;;;
4;;
5;; disk-page-hash.lisp
6;; A simple and very fast hashing mechanism for disk pages
7;;
8;; Portions Copyright © 2006 Clozure Associates and Anvita eReference (www.Anvita.info)
9;; Copyright © 1996-1999 Digitool, Inc.
10;; Copyright © 1992-1995 Apple Computer, Inc.
11;; All rights reserved.
12;; Permission is given to use, copy, and modify this software provided
13;; that Digitool is given credit in all derivative works.
14;; This software is provided "as is". Digitool makes no warranty or
15;; representation, either express or implied, with respect to this software,
16;; its quality, accuracy, merchantability, or fitness for a particular
17;; purpose.
18;;
19;; Entry points are: make-disk-page-hash, disk-page-gethash,
20;; (setf disk-page-gethash), disk-page-remhash, disk-page-maphash.
21;; They are similar to the Common Lisp hash table functions,
22;; except the table must have integer keys (fixnums if
23;; :wood-fixnum-addresses is on *features* when this file is compiled).
24;;
25
26;;;;;;;;;;;;;;;;;;;;;;;;;;
27;;
28;; Modification History
29;;
30;; 02/01/06 gz LispWorks port
31;; 01/10/00 akh moved (pushnew :wood-fixnum-addresses *features*) to block-io-mcl
32;; -------- 0.96
33;; -------- 0.95
34;; -------- 0.94
35;; 03/21/96 bill (make-array ... :INITIAL-ELEMENT NIL) in make-disk-page-hash-table-vector
36;; -------- 0.93
37;; 05/31/95 bill New file
38;;
39
40#|
41Algorithm notes.
42
43This uses the same basic Knuth algorithm as MCL's hash table implementation.
44Hashing is into a vector of size 2**n. Each entry has either a key, a
45deleted marker, or an empty marker. The first hash probe is a mask with
46the key. If that hits the looked-for key, we're done. If it hits an empty
47marker, we're done and the desired key is not in the table. If it hits
48a deleted marker or a different key, we pick a secondary key from the
49*secondary-keys* table. Then add the secondary key, modding with the table
50length until you find the key you're looking for or an empty marker.
51Since all the secondary keys are odd primes (relatively prime to the table
52size), this is guaranteed to hit every element of the vector.
53
54A sequence of insertions and deletions can leave a table that has no empty
55markers. This makes gethash for a key that isn't in the table take a long time.
56Hence, when this condition is detected, the table is rehashed to get rid of
57the deleted markers. The rehashing algorithm is the library card catalog
58algorithm. You keep a counter of which card drawer you last started with.
59pull the drawer out of that slot and increment the counter. Put the drawer
60in your hand where it goes. If this causes you to pull out another drawer,
61then put that one where it goes. Eventually, you'll put a drawer in an empty
62slot and you can go back to the counter's slot. Continue until the counter
63is greater than the number of slots.
64
65|#
66
67(in-package :wood)
68
69#| ;; moved to block-io-mcl
70; Assume fixnum addresses.
71; Comment out this form to compile Wood for files larger than 256 megs.
72(eval-when (:compile-toplevel :execute :load-toplevel)
73 (pushnew :wood-fixnum-addresses *features*))
74|#
75
76(defstruct (disk-page-hash (:constructor cons-disk-page-hash ())
77 (:print-function print-disk-page-hash))
78 vector ; Where the data is stored.
79 vector-length ; (length vector) - a power of 2
80 size ; number of entries that will fit
81 count ; number of entries currently in stored
82 mask ; (1- (ash vector-length -1))
83 shift ; (integer-length mask)
84 secondary-mask ; mask for length of *secondary-keys* shifted up by shift
85 (cache-address nil) ; adderss of last reference
86 (cache-value nil) ; value of last reference
87 (cache-index nil) ; vector index of last reference
88 page-size ; The page size of the disk-cache using this hash table
89 page-size-shift ; (integer-length (1- page-size))
90 bit-vector) ; for rehashing. Actually an (unsigned-byte 8) vector (faster).
91
92(defun print-disk-page-hash (hash stream level)
93 (declare (ignore level))
94 (print-unreadable-object (hash stream :identity t :type t)
95 (format stream "~d/~d"
96 (disk-page-hash-count hash)
97 (disk-page-hash-size hash))))
98
99(defconstant *secondary-keys*
100 (coerce (mapcar #'(lambda (x) (+ x x)) '(3 5 7 11 13 17 19 23)) 'vector))
101
102(defconstant *secondary-keys-length* (length *secondary-keys*))
103(defconstant *secondary-keys-mask* (1- *secondary-keys-length*))
104
105(assert (eql *secondary-keys-length*
106 (expt 2 (integer-length (1- *secondary-keys-length*)))))
107
108(defconstant *no-key-marker* nil)
109(defconstant *deleted-key-marker* :deleted)
110
111(defparameter *minimum-size*
112 (expt 2 (1- (integer-length (apply 'max (coerce *secondary-keys* 'list))))))
113
114; Not just the default; it's not a parameter.
115(defparameter *default-rehash-threshold* 0.85)
116
117(defun make-disk-page-hash-table-vector (count &optional (rehash-threshold *default-rehash-threshold*))
118 (let* ((nominal-count (max *minimum-size*
119 (1+ count)
120 (ceiling count rehash-threshold)))
121 (shift (integer-length (1- nominal-count)))
122 (real-count (expt 2 shift)))
123 (values
124 (make-array (* 2 real-count) :initial-element nil)
125 real-count
126 shift)))
127
128(defun make-disk-page-hash (&key (size 1) (page-size 1))
129 (init-disk-page-hash (cons-disk-page-hash) size page-size))
130
131(defun init-disk-page-hash (hash count page-size)
132 (multiple-value-bind (vector real-count shift) (make-disk-page-hash-table-vector count)
133 (let ((size (truncate (* real-count *default-rehash-threshold*))))
134 (when (eql size real-count)
135 (decf size))
136 (setf (disk-page-hash-vector hash) vector
137 (disk-page-hash-vector-length hash) (length vector)
138 (disk-page-hash-size hash) size
139 (disk-page-hash-count hash) 0
140 (disk-page-hash-mask hash) (1- real-count)
141 (disk-page-hash-shift hash) shift
142 (disk-page-hash-secondary-mask hash) (ash *secondary-keys-mask* shift)
143 (disk-page-hash-cache-address hash) nil
144 (disk-page-hash-cache-value hash) nil
145 (disk-page-hash-cache-index hash) nil
146 (disk-page-hash-page-size hash) page-size
147 (disk-page-hash-page-size-shift hash) (integer-length (1- page-size))
148 (disk-page-hash-bit-vector hash) nil)))
149 hash)
150
151#+wood-fixnum-addresses
152(defun-inline address-iasr (count address)
153 (declare (fixnum count address))
154 (the fixnum (%iasr count address)))
155
156#-wood-fixnum-addresses
157(defun-inline address-iasr (count address)
158 (declare (fixnum count)
159 (optimize (speed 3) (safety 0) #+LispWorks (float 0)))
160 #-LispWorks
161 (the fixnum (ash address (the fixnum (- 0 count))))
162 #+LispWorks
163 (the fixnum (sys:int32-to-integer (sys:int32>> (the integer address) count))))
164
165
166; I wanted this to be an inlined function, but MCL's compiler wouldn't inline the knowledge
167; that address was a fixnum.
168(defmacro %disk-page-gethash-macro (address hash &optional fixnum-address?)
169 `(locally (declare (optimize (speed 3) (safety 0) #+LispWorks (float 0))
170 ,@(and fixnum-address? '((type fixnum address))))
171 (if (eql ,address (disk-page-hash-cache-address ,hash))
172 (disk-page-hash-cache-value ,hash)
173 (let* ((page-number ,(if fixnum-address?
174 `(%iasr (disk-page-hash-page-size-shift ,hash) ,address)
175 `(address-iasr (disk-page-hash-page-size-shift ,hash) ,address)))
176 (hash-code (logand page-number (the fixnum (disk-page-hash-mask ,hash))))
177 (index (+ hash-code hash-code))
178 (vector (disk-page-hash-vector ,hash))
179 (probe (svref vector index)))
180 (declare (fixnum hash-code index page-number)
181 (type simple-vector vector))
182 (cond ((eql probe ,address) (aref vector (the fixnum (1+ index))))
183 ((eq probe *no-key-marker*) nil)
184 (t (let ((secondary-key (aref *secondary-keys*
185 (%iasr (disk-page-hash-shift ,hash)
186 (logand page-number (the fixnum (disk-page-hash-secondary-mask ,hash))))))
187 (vector-length (disk-page-hash-vector-length ,hash))
188 (original-index index))
189 (declare (fixnum secondary-key vector-length original-index))
190 (loop
191 (incf index secondary-key)
192 (when (>= index vector-length)
193 (decf index vector-length))
194 (when (eql index original-index)
195 (return nil))
196 (let ((probe (aref vector index)))
197 (when (eql probe ,address)
198 (let ((value (aref vector (the fixnum (1+ index)))))
199 (setf (disk-page-hash-cache-address ,hash) ,address
200 (disk-page-hash-cache-value ,hash) value
201 (disk-page-hash-cache-index ,hash) index)
202 (return value)))
203 (when (eq probe *no-key-marker*)
204 (return nil)))))))))))
205
206#|
207;(disassemble #'fixnum-disk-page-gethash)
208(defun fixnum-disk-page-gethash (address hash)
209 (declare (type fixnum address))
210 (declare (optimize (speed 3) (safety 0) #+LispWorks (float 0)))
211 (%disk-page-gethash-macro address hash t))
212|#
213
214; This is one of WOOD's most-called functions.
215; It's important that it be as fast as possible.
216(defun disk-page-gethash (address hash #+LispWorks &optional #+LispWorks ignore)
217 #+LispWorks (declare (ignore ignore)) ;; see def-accessor for explanation.
218 (declare (optimize (speed 3) (safety 0) #+LispWorks (float 0)))
219 ; Assume if it's non-null that it's of the right type since
220 ; type check takes too long (unless unlined LAP?).
221 ; Need to check for null since disk cache inspectors can remain open
222 ; after their disk cache has been closed.
223 (unless hash
224 (error "Null hash table."))
225 (if #+wood-fixnum-addresses t #-wood-fixnum-addresses (fixnump address)
226 (locally (declare (fixnum address))
227 (%disk-page-gethash-macro address hash t))
228 (%disk-page-gethash-macro address hash)))
229
230(defun (setf disk-page-gethash) (value address hash &optional deleting?)
231 (declare (optimize (speed 3) (safety 0) #+LispWorks (float 0)))
232 #+wood-fixnum-addresses (declare (fixnum address))
233 (unless (typep hash 'disk-page-hash)
234 (setq hash (require-type hash 'disk-page-hash)))
235 (let ((vector (disk-page-hash-vector hash)))
236 (if (eql address (disk-page-hash-cache-address hash))
237 (let ((index (disk-page-hash-cache-index hash)))
238 (if deleting?
239 (let ((vector (disk-page-hash-vector hash)))
240 (setf (disk-page-hash-cache-address hash) nil ;
241 (disk-page-hash-cache-value hash) nil
242 (disk-page-hash-cache-index hash) nil
243 (aref vector index) *deleted-key-marker*
244 (aref vector (the fixnum (1+ index))) nil)
245 (decf (the fixnum (disk-page-hash-count hash)))
246 t)
247 (setf (disk-page-hash-cache-value hash) value
248 (aref vector (1+ index)) value)))
249 (let* ((page-size-shift (disk-page-hash-page-size-shift hash))
250 (page-number (address-iasr page-size-shift address))
251 (hash-code (logand page-number (the fixnum (disk-page-hash-mask hash))))
252 (index (* 2 hash-code))
253 (probe (svref vector index))
254 (new-key? (not deleting?)))
255 (declare (fixnum page-size-shift hash-code index))
256 (or (when (eql probe address)
257 (setq new-key? nil)
258 t)
259 (eql probe *no-key-marker*)
260 (let ((secondary-key (aref *secondary-keys*
261 (%iasr (disk-page-hash-shift hash)
262 (logand page-number (the fixnum (disk-page-hash-secondary-mask hash))))))
263 (vector-length (length vector))
264 (first-deletion nil)
265 (original-index index))
266 (declare (fixnum secondary-key vector-length original-index))
267 (loop
268 (incf index secondary-key)
269 (when (>= index vector-length)
270 (decf index vector-length))
271 (let ((probe (aref vector index)))
272 (when (eql probe address)
273 (setq new-key? nil)
274 (return t))
275 (when (and (not deleting?)
276 (eql index original-index)
277 (< (disk-page-hash-count hash) (disk-page-hash-size hash)))
278 (incf (disk-page-hash-count hash))
279 (return-from disk-page-gethash
280 (disk-page-rehash hash address value)))
281 (when (eql probe *no-key-marker*)
282 (when first-deletion
283 (setq index first-deletion))
284 (return t))
285 (when (eql probe *deleted-key-marker*)
286 (unless first-deletion
287 (setq first-deletion index)))))))
288 (when new-key?
289 (let ((count (disk-page-hash-count hash)))
290 (declare (fixnum count))
291 (if (>= count (disk-page-hash-size hash))
292 (return-from disk-page-gethash (grow-disk-page-hash hash address value))
293 (setf (disk-page-hash-count hash) (the fixnum (1+ count))))))
294 (if deleting?
295 (when (integerp (aref vector index))
296 (decf (disk-page-hash-count hash))
297 (setf (disk-page-hash-cache-address hash) nil
298 (disk-page-hash-cache-value hash) nil
299 (disk-page-hash-cache-index hash) nil
300 (aref vector index) *deleted-key-marker*
301 (aref vector (the fixnum (1+ index))) nil)
302 t)
303 (setf (disk-page-hash-cache-address hash) address
304 (disk-page-hash-cache-value hash) value
305 (disk-page-hash-cache-index hash) index
306 (aref vector index) address
307 (aref vector (the fixnum (1+ index))) value))))))
308
309(defun disk-page-remhash (address hash)
310 (setf (disk-page-gethash address hash t) nil))
311
312(defun disk-page-maphash (function hash)
313 (disk-page-map-vector function (disk-page-hash-vector hash)))
314
315(defun disk-page-map-vector (function vector)
316 (let ((index 0)
317 (length (length vector)))
318 (declare (fixnum index length))
319 (loop
320 (let ((key (%svref vector index))
321 (value (%svref vector (incf index))))
322 (incf index)
323 (unless (or (eql key *no-key-marker*) (eql key *deleted-key-marker*))
324 (funcall function key value))
325 (when (>= index length)
326 (return))))))
327
328(defun grow-disk-page-hash (hash address value)
329 (let* ((vector (disk-page-hash-vector hash))
330 (mapper #'(lambda (key value)
331 (setf (disk-page-gethash key hash) value))))
332 (declare (dynamic-extent mapper))
333 (init-disk-page-hash hash
334 (* 2 (disk-page-hash-size hash))
335 (disk-page-hash-page-size hash))
336 (disk-page-map-vector mapper vector)
337 (setf (disk-page-gethash address hash) value)))
338
339; Rehash to get rid of deleted markers. Insert address/value pair
340; This is called when the vector has no empty slots, all are filled
341; with data or delted key markers. In that state a failing gethash
342; takes a long time, so we get rid of the delted markers to speed it up.
343(defun disk-page-rehash (hash address value)
344 (declare (optimize (speed 3) (safety 0) #+LispWorks (float 0)))
345 (locally
346 (declare (optimize (speed 3) (safety 0)))
347 (setf (disk-page-hash-cache-address hash) *no-key-marker*
348 (disk-page-hash-cache-value hash) nil
349 (disk-page-hash-cache-index hash) nil)
350 (let* ((vector (disk-page-hash-vector hash))
351 (bits (disk-page-hash-bit-vector hash))
352 (vector-length (disk-page-hash-vector-length hash))
353 (page-size-shift (disk-page-hash-page-size-shift hash))
354 (mask (disk-page-hash-mask hash))
355 (shift (disk-page-hash-shift hash))
356 (secondary-mask (disk-page-hash-secondary-mask hash))
357 (loop-index -2)
358 (loop-index+1 -1)
359 (original-value value))
360 (declare (type simple-vector vector)
361 (fixnum vector-length page-size-shift mask shift secondary-mask loop-index loop-index+1))
362 (flet ((bit-ref (bits index)
363 (declare (type (simple-array (unsigned-byte 8) (*)) bits)
364 (optimize (speed 3)(safety 0)))
365 (aref bits index))
366 ((setf bit-ref) (v bits index)
367 (declare (type (simple-array (unsigned-byte 8) (*)) bits)
368 (optimize (speed 3)(safety 0)))
369 (setf (aref bits index) v)))
370 (declare (inline bit-ref (setf bit-ref)))
371 (if (or (null bits) (< (length bits) vector-length))
372 (setq bits
373 ; not really a bit vector because that's too slow
374 (setf (disk-page-hash-bit-vector hash)
375 (make-array vector-length :element-type '(unsigned-byte 8) :initial-element 0)))
376 (dotimes (i vector-length) (setf (bit-ref bits i) 0)))
377 (loop
378 (unless address
379 (loop
380 (incf loop-index 2)
381 (incf loop-index+1 2)
382 ;(print-db loop-index)
383 (when (>= loop-index vector-length)
384 (return-from disk-page-rehash original-value))
385 (when (eql 0 (bit-ref bits loop-index))
386 (setq address (svref vector loop-index))
387 ;(print-db address)
388 (cond ((eql address *no-key-marker*))
389 ((eql address *deleted-key-marker*)
390 (setf (svref vector loop-index) *no-key-marker*
391 (svref vector loop-index+1) nil))
392 (t (setq value (svref vector loop-index+1))
393 (setf (svref vector loop-index) *no-key-marker*
394 (svref vector loop-index+1) nil)
395 (return))))))
396 (let* ((integer-address address)
397 (page-number (address-iasr page-size-shift integer-address))
398 (hash-code (logand page-number mask))
399 (index (* 2 hash-code)))
400 #+wood-fixnum-addresses (declare (fixnum integer-address page-number))
401 (declare (fixnum hash-code index))
402 (flet ((insert-p (probe)
403 (let ((index+1 (1+ index)))
404 (declare (fixnum index+1))
405 (cond ((or (eql probe *no-key-marker*) (eql probe *deleted-key-marker*))
406 (setf (svref vector index) address
407 (svref vector index+1) value)
408 (setq address nil value nil)
409 (setf (bit-ref bits index) 1))
410 ((eql 0 (bit-ref bits index))
411 (setf (svref vector index) address
412 address probe)
413 (rotatef value (svref vector index+1))
414 (setf (bit-ref bits index) 1))
415 (t nil)))))
416 (declare (dynamic-extent #'insert-p))
417 (unless (insert-p (svref vector index))
418 (let ((secondary-key (aref *secondary-keys*
419 (%iasr shift (logand page-number secondary-mask)))))
420 (declare (fixnum secondary-key))
421 (loop
422 (incf index secondary-key)
423 (when (>= index vector-length)
424 (decf index vector-length))
425 ;(print-db index)
426 (when (insert-p (svref vector index)) (return))))))))))))
427
428; For testing
429#|
430(advise disk-page-rehash
431 (destructuring-bind (hash address value) arglist
432 (prog1
433 (:do-it)
434 (let ((was (disk-page-gethash address hash)))
435 (unless (eq value was)
436 (error "address: ~s, sb: ~s, was: ~s" address value was)))
437 (let ((mapper #'(lambda (a v)
438 (let ((was (disk-page-gethash a hash)))
439 (unless (eq was v)
440 (error "Address: ~s~%, sb: ~s~%, was: ~s"
441 a v was))
442 (unless (eql a (disk-page-address v))
443 (error "Address: ~s for ~s" a v))))))
444 (declare (dynamic-extent mapper))
445 (disk-page-maphash mapper hash))))
446 :when :around
447 :name :debug)
448
449(advise (setf disk-page-gethash)
450 (destructuring-bind (value address hash &optional delete?) arglist
451 (unless (or delete? (eql address (disk-page-address value)))
452 (error "Address: ~s, value: ~s" address value))
453 (:do-it)
454 (unless (eq (disk-page-gethash address hash) value)
455 (error "Bad value")))
456 :when :around
457 :name :debug)
458|#
459;;; 1 6/02/95 bill 1.11d040
460;;; 2 8/01/95 bill 1.11d065
Note: See TracBrowser for help on using the repository browser.