source: branches/working-0710/ccl/lib/hash.lisp @ 7693

Last change on this file since 7693 was 7693, checked in by gb, 13 years ago

New hash-table iteration interface.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 16.5 KB
Line 
1;;;-*- Mode: Lisp; Package: CCL -*-
2;;;
3;;;   Copyright (C) 1994-2001 Digitool, Inc
4;;;   This file is part of OpenMCL. 
5;;;
6;;;   OpenMCL is licensed under the terms of the Lisp Lesser GNU Public
7;;;   License , known as the LLGPL and distributed with OpenMCL as the
8;;;   file "LICENSE".  The LLGPL consists of a preamble and the LGPL,
9;;;   which is distributed with OpenMCL as the file "LGPL".  Where these
10;;;   conflict, the preamble takes precedence. 
11;;;
12;;;   OpenMCL is referenced in the preamble as the "LIBRARY."
13;;;
14;;;   The LLGPL is also available online at
15;;;   http://opensource.franz.com/preamble.html
16
17;;  This is just the stuff (make-load-form, print-object) that can't be fasloaded earlier.
18
19
20;;;;;;;;;;;;;
21;;
22;; hash.lisp
23;; New hash table implementation
24
25;;;;;;;;;;;;;
26;;
27;; Things I didn't do
28;;
29;; Save the 32-bit hash code along with the key so that growing the table can
30;; avoid calling the hashing function (at least until a GC happens during growing).
31;;
32;; Maybe use Knuth's better method for hashing:
33;; find two primes N-2, N.  N is the table size.
34;; First probe is at primary = (mod (funcall (nhash.keytransF h) key) N)
35;; Secondary probes are spaced by (mod (funcall (nhash.keytransF h) key) N-2)
36;; This does a bit better scrambling of the secondary probes, but costs another divide.
37;;
38;; Rethink how finalization is reported to the user.  Maybe have a finalization function which
39;; is called with the hash table and the deleted key & value.
40
41
42;;;;;;;;;;;;;
43;;
44;; Documentation
45;;
46;; MAKE-HASH-TABLE is extended to accept a :HASH-FUNCTION keyword arg which
47;; defaults for the 4 Common Lisp defined :TEST's.  Also, any fbound symbol can
48;; be used for the :TEST argument.  The HASH-FUNCTION is a function of one
49;; argument, the key, which returns one or two values:
50;;
51;; 1) HASH-CODE
52;; 2) ADDRESSP
53;;
54;; The HASH-CODE can be any object.  If it is a relocateable object (not a
55;; fixnum, short float, or immediate) then ADDRESSP will default to :KEY
56;; and it is an error if NIL is returned for ADDRESSP.
57;;
58;; If ADDRESSP is NIL, the hashing code assumes that no addresses were used
59;; in computing the HASH-CODE.  If ADDRESSP is :KEY (which is the default
60;; if the hash function returns only one value and it is relocateable) then
61;; the hashing code assumes that only the KEY's address was used to compute
62;; the HASH-CODE.  Otherwise, it is assumed that the address of a
63;; component of the key was used to compute the HASH-CODE.
64;;
65;;
66;;
67;; Some (proposed) functions for using in user hashing functions:
68;;
69;; (HASH-CODE object)
70;;
71;; returns two values:
72;;
73;; 1) HASH-CODE
74;; 2) ADDRESSP
75;;
76;; HASH-CODE is the object transformed into a fixnum by changing its tag
77;; bits to a fixnum's tag.  ADDRESSP is true if the object was
78;; relocateable. ;;
79;;
80;; (FIXNUM-ADD o1 o2)
81;; Combines two objects additively and returns a fixnum.
82;; If the two objects are fixnums, will be the same as (+ o1 o2) except
83;; that the result can not be a bignum.
84;;
85;; (FIXNUM-MULTIPLY o1 o2)
86;; Combines two objects multiplicatively and returns a fixnum.
87;;
88;; (FIXNUM-FLOOR dividend &optional divisor)
89;; Same as Common Lisp's FLOOR function, but converts the objects into
90;; fixnums before doing the divide and returns two fixnums: quotient &
91;; remainder.
92;;
93;;;;;;;;;;;;;
94;;
95;; Implementation details.
96;;
97;; Hash table vectors have a header that the garbage collector knows
98;; about followed by alternating keys and values.  Empty slots have a
99;; key of (%UNBOUND-MARKER), deleted slots are denoted by a key of
100;; (%SLOT-UNBOUND-MARKER).
101;;
102;; Four bits in the nhash.vector.flags fixnum interact with the garbage
103;; collector.  This description uses the symbols that represent bit numbers
104;; in a fixnum.  $nhash_xxx_bit has a corresponding $nhash_lap_xxx_bit which
105;; gives the byte offset of the bit for LAP code.  The two bytes in
106;; question are at offsets $nhash.vector-weak-byte and
107;; $nhash.vector-track-keys-byte offsets from the tagged vector.
108;; The 32 bits of the fixnum at nhash.vector.flags look like:
109;;
110;;     TK0C0000 00000000 WVF00000 00000000
111;;
112;;
113;; $nhash_track_keys_bit         "T" in the diagram above
114;;                               Sign bit of the longword at $nhash.vector.flags
115;;                               or the byte at $nhash.vector-track-keys-byte.
116;;                               If set, GC tracks relocation of keys in the
117;;                               vector.
118;; $nhash_key_moved_bit          "K" in the diagram above
119;;                               Set by GC to indicate that a key moved.
120;;                               If $nhash_track_keys_bit is clear, this bit is set to
121;;                               indicate that any GC will require a rehash.
122;;                               GC never clears this bit, but may set it if
123;;                               $nhash_track_keys_bit is set.
124;; $nhash_component_address_bit  "C" in the diagram above.
125;;                               Ignored by GC.  Set to indicate that the
126;;                               address of a component of a key was used.
127;;                               Means that $nhash_track_keys_bit will
128;;                               never be set until all such keys are
129;;                               removed.
130;; $nhash_weak_bit               "W" in the diagram above
131;;                               Sign bit of the byte at $nhash.vector-weak-byte
132;;                               Set to indicate a weak hash table
133;; $nhash_weak_value_bit         "V" in the diagram above
134;;                               If clear, the table is weak on key
135;;                               If set, the table is weak on value
136;; $nhash_finalizeable_bit       "F" in the diagram above
137;;                               If set the table is finalizeable:
138;;                               If any key/value pairs are removed, they will be added to
139;;                               the nhash.vector.finalization-alist using cons cells
140;;                               from nhash.vector.free-alist
141
142(in-package "CCL")
143
144
145(eval-when (:compile-toplevel :execute)
146  (require "HASHENV" "ccl:xdump;hashenv"))
147
148(defvar *hash-table-class*
149  (progn
150;    #+sparc-target (dbg)
151    (make-built-in-class 'hash-table *istruct-class*)))
152
153(setf (type-predicate 'hash-table) 'hash-table-p)
154
155
156(defmethod print-object ((table hash-table) stream)
157  (print-unreadable-object (table stream :type t :identity t)
158    (format stream "~S ~S size ~D/~D"
159            ':test (hash-table-test table)
160            (hash-table-count table)
161            (hash-table-size table))
162    (when (readonly-hash-table-p table)
163      (format stream " (Readonly)"))))
164
165
166;;; Of course, the lisp version of this would be too slow ...
167(defun hash-table-finalization-list (hash-table)
168  (unless (hash-table-p hash-table)
169    (report-bad-arg hash-table 'hash-table))
170  (let* ((vector (nhash.vector hash-table))
171         (flags (nhash.vector.flags vector)))
172    (declare (fixnum flags))
173    (if (logbitp $nhash_finalizeable_bit flags)
174      (nhash.vector.finalization-alist vector)
175      (error "~S is not a finalizeable hash table" hash-table))))
176
177(defun (setf hash-table-finalization-list) (value hash-table)
178  (unless (hash-table-p hash-table)
179    (report-bad-arg hash-table 'hash-table))
180  (let* ((vector (nhash.vector hash-table))
181         (flags (nhash.vector.flags vector)))
182    (declare (fixnum flags))
183    (if (logbitp $nhash_finalizeable_bit flags)
184      (setf (nhash.vector.finalization-alist vector) value)
185      (error "~S is not a finalizeable hash table" hash-table))))
186
187(defsetf gethash puthash)
188
189; Returns nil, :key or :value
190(defun hash-table-weak-p (hash)
191  (unless (hash-table-p hash)
192    (setq hash (require-type hash 'hash-table)))
193  (let* ((vector (nhash.vector hash))
194         (flags (nhash.vector.flags vector)))
195    (when (logbitp $nhash_weak_bit flags)
196      (if (logbitp $nhash_weak_value_bit flags)
197        :value
198        :key))))
199
200;;; It would be pretty complicated to offer a way of doing (SETF
201;;; HASH-TABLE-WEAK-P) after the hash-table's been created, and
202;;; it's not clear that that'd be incredibly useful.
203
204
205
206;;;;;;;;;;;;;
207;;
208;; Mapping functions
209;;
210
211
212
213(defun next-hash-table-iteration (state)
214  (do* ((hash (nhti.hash-table state))
215        (index (nhti.index state) (1+ index))
216        (keys (nhti.keys state))
217        (nkeys (nhti.nkeys state))
218        (missing (cons nil nil)))
219       ((>= index nkeys)
220        (setf (nhti.index state) nkeys)
221        (values nil nil nil))
222    (declare (fixnum index nkeys)
223             (simple-vector keys)
224             (dynamic-extent missing))
225    (let* ((key (svref keys index))
226           (value (gethash key hash missing)))
227      (unless (eq value missing)
228        (setf (nhti.index state) (1+ index))
229        (return (values t key value))))))
230
231
232
233(defun maphash (function hash-table)
234  "For each entry in HASH-TABLE, call the designated two-argument function
235   on the key and value of the entry. Return NIL."
236  (with-hash-table-iterator (m hash-table)
237    (loop
238      (multiple-value-bind (found key value) (m)
239        (unless found (return))
240        (funcall function key value)))))
241
242
243
244(defmethod make-load-form ((hash hash-table) &optional env)
245  (declare (ignore env))
246  (let ((rehashF (function-name (nhash.rehashF hash)))
247        (keytransF (nhash.keytransF hash))
248        (compareF (nhash.compareF hash))
249        (vector (nhash.vector hash))
250        (private (if (nhash.owner hash) '*current-process*))
251        (count (nhash.count hash)))
252    (flet ((convert (f)
253             (if (or (fixnump f) (symbolp f))
254               `',f
255               `(symbol-function ',(function-name f)))))
256      (values
257       `(%cons-hash-table
258         nil nil nil nil ,(nhash.grow-threshold hash) ,(nhash.rehash-ratio hash) ,(nhash.rehash-size hash) ,(nhash.address-based hash) nil nil ,private)
259       `(%initialize-hash-table ,hash ',rehashF ,(convert keytransF) ,(convert compareF)
260                                ',vector ,count)))))
261
262(defun needs-rehashing (hash)
263  (%set-needs-rehashing hash))
264
265(defun %initialize-hash-table (hash rehashF keytransF compareF vector count)
266  (setf (nhash.rehashF hash) (symbol-function rehashF)
267        (nhash.keytransF hash) keytransF
268        (nhash.compareF hash) compareF
269        (nhash.vector hash) vector
270        (nhash.count hash) count)
271  (setf (nhash.find hash)
272        (case comparef
273          (0 #'eq-hash-find)
274          (-1 #'eql-hash-find)
275          (t #'general-hash-find))
276        (nhash.find-new hash)
277        (case comparef
278          (0 #'eq-hash-find-for-put)
279          (-1 #'eql-hash-find-for-put)
280          (t #'general-hash-find-for-put)))
281  (%set-needs-rehashing hash))
282
283
284;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
285;;
286;; Support for locking hash tables while fasdumping
287;;
288
289
290(defun fasl-lock-hash-table (hash-table)
291  (setq hash-table (require-type hash-table 'hash-table))
292  (without-interrupts
293   (let* ((lock (nhash.exclusion-lock hash-table)))
294     (if lock
295       (write-lock-rwlock lock)
296       (progn
297         (unless (eq (nhash.owner hash-table) *current-process*)
298           (error "Current process doesn't own hash-table ~s" hash-table))))
299     (push hash-table *fcomp-locked-hash-tables*))))
300
301(defun fasl-unlock-hash-tables ()
302  (dolist (h *fcomp-locked-hash-tables*)
303    (let* ((lock (nhash.exclusion-lock h)))
304      (if lock (unlock-rwlock lock)))))
305
306
307
308             
309
310#+not-yet
311(progn
312;;;;;;;;;;;;;
313;;
314;; Replacement for population
315;;
316(def-accessors (weak-table) %svref
317  nil                                   ; 'weak-table
318  weak-table.vector                     ; a $v_nhash vector
319  weak-table.index                      ; index for next entry
320  weak-table.grow-threshold             ; number of entries left in vector
321  )
322
323(defun make-weak-table (&optional (size 20))
324  (%istruct 'weak-table
325            (%cons-nhash-vector
326             size (+ (ash 1 $nhash_weak_bit)))
327            0
328            size))
329
330(defun weak-table-p (weak-table)
331  (istruct-typep weak-table 'weak-table))
332
333(setf (type-predicate 'weak-table) 'weak-table-p)
334
335(defun weak-table-count (weak-table)
336  (setq weak-table (require-type weak-table 'weak-table))
337  (- (weak-table.index weak-table)
338     (nhash.vector.weak-deletions-count (weak-table.vector weak-table))))
339
340(defun weak-table-push (key weak-table &optional value)
341  (setq weak-table (require-type weak-table 'weak-table))
342  (let ((thresh (weak-table.grow-threshold weak-table))
343        (vector (weak-table.vector weak-table))
344        (index (weak-table.index weak-table)))
345    (declare (fixnum thresh index))
346    (if (> thresh 0)
347      (progn
348        (lap-inline (index)
349          (:variable vector key value)
350          (move.l (varg vector) atemp0)
351          (lea (atemp0 arg_z.l $nhash_data) atemp0)
352          (move.l (varg key) atemp0@+)
353          (move.l (varg value) @atemp0))
354        (setf (weak-table.index weak-table) (the fixnum (1+ index))
355              (weak-table.grow-threshold weak-table) (the fixnum (1- thresh)))
356        value)
357      (let ((deletions (nhash.vector.weak-deletions-count vector)))
358        (declare (fixnum deletions))
359        (if (> deletions 0)
360          ; GC deleted some entries, we can compact the table
361          (progn
362            (lap-inline (index)
363              (:variable vector)
364              (getint arg_z)            ; length
365              (move.l (varg vector) atemp0)
366              (lea (atemp0 $nhash_data) atemp0)
367              (move.l atemp0 atemp1)
368              (move.l ($ $undefined) da)
369              ; Find the first deleted entry
370              (dbfloop.l arg_z
371                (if# (ne (cmp.l @atemp0 da))
372                  (add.l ($ 1) arg_z)
373                  (bra @move))
374                (add.w ($ 8) atemp0))
375              ; copy the rest of the table up
376              @move
377              (dbfloop.l arg_z
378                (move.l atemp0@+ db)
379                (if# (eq (cmp.l db da))
380                  (add.w ($ 4) atemp0)
381                 else#
382                  (move.l db atemp1@+)
383                  (move.l atemp0@+ atemp1@+)))
384              ; Write over the newly emptied part of the table
385              (while# (ne (cmp.l atemp0 atemp1))
386                (move.l da @atemp1)
387                (add.l ($ 8) atemp1)))
388            (setf (nhash.vector.weak-deletions-count vector) 0
389                  (weak-table.index weak-table) (the fixnum (- index deletions))
390                  (weak-table.grow-threshold weak-table) (the fixnum (+ thresh deletions)))
391            (weak-table-push key weak-table value))
392          ; table is full.  Grow it by a factor of 1.5
393          (let* ((new-size (+ index (the fixnum (ash (the fixnum (1+ index)) -1))))
394                 (new-vector (%cons-nhash-vector new-size (ash 1 $nhash_weak_bit))))
395            (declare (fixnum new-size))
396            (lap-inline (index)
397              (:variable vector new-vector count)
398              (move.l (varg vector) atemp0)
399              (move.l (varg new-vector) atemp1)
400              (lea (atemp0 $nhash_data) atemp0)
401              (lea (atemp1 $nhash_data) atemp1)
402              (getint arg_z)            ; table length
403              (dbfloop.l arg_z
404                (move.l atemp0@+ atemp1@+)
405                (move.l atemp0@+ atemp1@+)))
406            (setf (weak-table.vector weak-table) new-vector
407                  (weak-table.grow-threshold weak-table) (the fixnum (- new-size index)))
408            ; It's possible that GC deleted some entries while consing the new vector
409            (setf (nhash.vector.weak-deletions-count new-vector)
410                  (nhash.vector.weak-deletions-count vector))
411            (weak-table-push key weak-table value)))))))
412
413; function gets two args: key & value
414(defun map-weak-table (function weak-table)
415  (setq weak-table (require-type weak-table 'weak-table))
416  (let* ((vector (weak-table.vector weak-table))
417         (index (weak-table.index weak-table))
418         (flags (nhash.vector.flags vector)))
419    (unwind-protect
420      (progn
421        (setf (nhash.vector.flags vector) 0)    ; disable deletion by GC
422        (lap-inline ()
423          (:variable function vector index)
424          (while# (gt (move.l (varg index) da))
425            (sub.l '1 da)
426            (move.l da (varg index))
427            (move.l (varg vector) atemp0)
428            (move.l (atemp0 da.l $nhash_data) arg_y)
429            (if# (ne (cmp.w ($ $undefined) arg_y))
430              (move.l (atemp0 da.l (+ $nhash_data 4)) arg_z)
431              (set_nargs 2)
432              (move.l (varg function) atemp0)
433              (jsr_subprim $sp-funcall))))
434        nil)
435      (setf (nhash.vector.flags vector) flags))))
436
437; function gets one arg, the key
438(defun map-weak-table-keys (function weak-table)
439  (flet ((f (key value)
440           (declare (ignore value))
441           (funcall function key)))
442    (declare (dynamic-extent #'f))
443    (map-weak-table #'f weak-table)))
444   
445) ; #+not-yet
446
447; end
Note: See TracBrowser for help on using the repository browser.