source: trunk/source/lib/hash.lisp @ 8538

Last change on this file since 8538 was 8538, checked in by gb, 13 years ago

NEXT-HASH-TABLE-ITERATION-1: return a single NULL value at end of iteration.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 16.4 KB
Line 
1;;;-*- Mode: Lisp; Package: CCL -*-
2;;;
3;;;   Copyright (C) 1994-2001 Digitool, Inc
4;;;   This file is part of OpenMCL. 
5;;;
6;;;   OpenMCL is licensed under the terms of the Lisp Lesser GNU Public
7;;;   License , known as the LLGPL and distributed with OpenMCL as the
8;;;   file "LICENSE".  The LLGPL consists of a preamble and the LGPL,
9;;;   which is distributed with OpenMCL as the file "LGPL".  Where these
10;;;   conflict, the preamble takes precedence. 
11;;;
12;;;   OpenMCL is referenced in the preamble as the "LIBRARY."
13;;;
14;;;   The LLGPL is also available online at
15;;;   http://opensource.franz.com/preamble.html
16
17;;  This is just the stuff (make-load-form, print-object) that can't be fasloaded earlier.
18
19
20;;;;;;;;;;;;;
21;;
22;; hash.lisp
23;; New hash table implementation
24
25;;;;;;;;;;;;;
26;;
27;; Things I didn't do
28;;
29;; Save the 32-bit hash code along with the key so that growing the table can
30;; avoid calling the hashing function (at least until a GC happens during growing).
31;;
32;; Maybe use Knuth's better method for hashing:
33;; find two primes N-2, N.  N is the table size.
34;; First probe is at primary = (mod (funcall (nhash.keytransF h) key) N)
35;; Secondary probes are spaced by (mod (funcall (nhash.keytransF h) key) N-2)
36;; This does a bit better scrambling of the secondary probes, but costs another divide.
37;;
38;; Rethink how finalization is reported to the user.  Maybe have a finalization function which
39;; is called with the hash table and the deleted key & value.
40
41
42;;;;;;;;;;;;;
43;;
44;; Documentation
45;;
46;; MAKE-HASH-TABLE is extended to accept a :HASH-FUNCTION keyword arg which
47;; defaults for the 4 Common Lisp defined :TEST's.  Also, any fbound symbol can
48;; be used for the :TEST argument.  The HASH-FUNCTION is a function of one
49;; argument, the key, which returns one or two values:
50;;
51;; 1) HASH-CODE
52;; 2) ADDRESSP
53;;
54;; The HASH-CODE can be any object.  If it is a relocateable object (not a
55;; fixnum, short float, or immediate) then ADDRESSP will default to :KEY
56;; and it is an error if NIL is returned for ADDRESSP.
57;;
58;; If ADDRESSP is NIL, the hashing code assumes that no addresses were used
59;; in computing the HASH-CODE.  If ADDRESSP is :KEY (which is the default
60;; if the hash function returns only one value and it is relocateable) then
61;; the hashing code assumes that only the KEY's address was used to compute
62;; the HASH-CODE.  Otherwise, it is assumed that the address of a
63;; component of the key was used to compute the HASH-CODE.
64;;
65;;
66;;
67;; Some (proposed) functions for using in user hashing functions:
68;;
69;; (HASH-CODE object)
70;;
71;; returns two values:
72;;
73;; 1) HASH-CODE
74;; 2) ADDRESSP
75;;
76;; HASH-CODE is the object transformed into a fixnum by changing its tag
77;; bits to a fixnum's tag.  ADDRESSP is true if the object was
78;; relocateable. ;;
79;;
80;; (FIXNUM-ADD o1 o2)
81;; Combines two objects additively and returns a fixnum.
82;; If the two objects are fixnums, will be the same as (+ o1 o2) except
83;; that the result can not be a bignum.
84;;
85;; (FIXNUM-MULTIPLY o1 o2)
86;; Combines two objects multiplicatively and returns a fixnum.
87;;
88;; (FIXNUM-FLOOR dividend &optional divisor)
89;; Same as Common Lisp's FLOOR function, but converts the objects into
90;; fixnums before doing the divide and returns two fixnums: quotient &
91;; remainder.
92;;
93;;;;;;;;;;;;;
94;;
95;; Implementation details.
96;;
97;; Hash table vectors have a header that the garbage collector knows
98;; about followed by alternating keys and values.  Empty slots have a
99;; key of (%UNBOUND-MARKER), deleted slots are denoted by a key of
100;; (%SLOT-UNBOUND-MARKER).
101;;
102;; Four bits in the nhash.vector.flags fixnum interact with the garbage
103;; collector.  This description uses the symbols that represent bit numbers
104;; in a fixnum.  $nhash_xxx_bit has a corresponding $nhash_lap_xxx_bit which
105;; gives the byte offset of the bit for LAP code.  The two bytes in
106;; question are at offsets $nhash.vector-weak-byte and
107;; $nhash.vector-track-keys-byte offsets from the tagged vector.
108;; The 32 bits of the fixnum at nhash.vector.flags look like:
109;;
110;;     TK0C0000 00000000 WVF00000 00000000
111;;
112;;
113;; $nhash_track_keys_bit         "T" in the diagram above
114;;                               Sign bit of the longword at $nhash.vector.flags
115;;                               or the byte at $nhash.vector-track-keys-byte.
116;;                               If set, GC tracks relocation of keys in the
117;;                               vector.
118;; $nhash_key_moved_bit          "K" in the diagram above
119;;                               Set by GC to indicate that a key moved.
120;;                               If $nhash_track_keys_bit is clear, this bit is set to
121;;                               indicate that any GC will require a rehash.
122;;                               GC never clears this bit, but may set it if
123;;                               $nhash_track_keys_bit is set.
124;; $nhash_component_address_bit  "C" in the diagram above.
125;;                               Ignored by GC.  Set to indicate that the
126;;                               address of a component of a key was used.
127;;                               Means that $nhash_track_keys_bit will
128;;                               never be set until all such keys are
129;;                               removed.
130;; $nhash_weak_bit               "W" in the diagram above
131;;                               Sign bit of the byte at $nhash.vector-weak-byte
132;;                               Set to indicate a weak hash table
133;; $nhash_weak_value_bit         "V" in the diagram above
134;;                               If clear, the table is weak on key
135;;                               If set, the table is weak on value
136;; $nhash_finalizeable_bit       "F" in the diagram above
137;;                               If set the table is finalizeable:
138;;                               If any key/value pairs are removed, they will be added to
139;;                               the nhash.vector.finalization-alist using cons cells
140;;                               from nhash.vector.free-alist
141
142(in-package "CCL")
143
144
145(eval-when (:compile-toplevel :execute)
146  (require "HASHENV" "ccl:xdump;hashenv"))
147
148(defvar *hash-table-class*
149  (progn
150;    #+sparc-target (dbg)
151    (make-built-in-class 'hash-table *istruct-class*)))
152
153(setf (type-predicate 'hash-table) 'hash-table-p)
154
155
156(defmethod print-object ((table hash-table) stream)
157  (print-unreadable-object (table stream :type t :identity t)
158    (format stream "~S ~S size ~D/~D"
159            ':test (hash-table-test table)
160            (hash-table-count table)
161            (hash-table-size table))
162    (when (readonly-hash-table-p table)
163      (format stream " (Readonly)"))))
164
165
166;;; Of course, the lisp version of this would be too slow ...
167(defun hash-table-finalization-list (hash-table)
168  (unless (hash-table-p hash-table)
169    (report-bad-arg hash-table 'hash-table))
170  (let* ((vector (nhash.vector hash-table))
171         (flags (nhash.vector.flags vector)))
172    (declare (fixnum flags))
173    (if (logbitp $nhash_finalizeable_bit flags)
174      (nhash.vector.finalization-alist vector)
175      (error "~S is not a finalizeable hash table" hash-table))))
176
177(defun (setf hash-table-finalization-list) (value hash-table)
178  (unless (hash-table-p hash-table)
179    (report-bad-arg hash-table 'hash-table))
180  (let* ((vector (nhash.vector hash-table))
181         (flags (nhash.vector.flags vector)))
182    (declare (fixnum flags))
183    (if (logbitp $nhash_finalizeable_bit flags)
184      (setf (nhash.vector.finalization-alist vector) value)
185      (error "~S is not a finalizeable hash table" hash-table))))
186
187(defsetf gethash puthash)
188
189; Returns nil, :key or :value
190(defun hash-table-weak-p (hash)
191  (unless (hash-table-p hash)
192    (setq hash (require-type hash 'hash-table)))
193  (let* ((vector (nhash.vector hash))
194         (flags (nhash.vector.flags vector)))
195    (when (logbitp $nhash_weak_bit flags)
196      (if (logbitp $nhash_weak_value_bit flags)
197        :value
198        :key))))
199
200;;; It would be pretty complicated to offer a way of doing (SETF
201;;; HASH-TABLE-WEAK-P) after the hash-table's been created, and
202;;; it's not clear that that'd be incredibly useful.
203
204
205
206;;;;;;;;;;;;;
207;;
208;; Mapping functions
209;;
210
211
212
213(defun next-hash-table-iteration-1 (state)
214  (do* ((index (nhti.index state) (1+ index))
215        (keys (nhti.keys state))
216        (values (nhti.values state))
217        (nkeys (nhti.nkeys state)))
218       ((>= index nkeys)
219        (setf (nhti.index state) nkeys)
220        nil)
221    (declare (fixnum index nkeys)
222             (simple-vector keys))
223    (let* ((key (svref keys index))
224           (value (svref values index)))
225        (setf (nhti.index state) (1+ index))
226        (return (values t key value)))))
227
228
229
230(defun maphash (function hash-table)
231  "For each entry in HASH-TABLE, call the designated two-argument function
232   on the key and value of the entry. Return NIL."
233  (with-hash-table-iterator (m hash-table)
234    (loop
235      (multiple-value-bind (found key value) (m)
236        (unless found (return))
237        (funcall function key value)))))
238
239
240
241(defmethod make-load-form ((hash hash-table) &optional env)
242  (declare (ignore env))
243  (let ((rehashF (function-name (nhash.rehashF hash)))
244        (keytransF (nhash.keytransF hash))
245        (compareF (nhash.compareF hash))
246        (vector (nhash.vector hash))
247        (private (if (nhash.owner hash) '*current-process*))
248        (count (nhash.count hash)))
249    (flet ((convert (f)
250             (if (or (fixnump f) (symbolp f))
251               `',f
252               `(symbol-function ',(function-name f)))))
253      (values
254       `(%cons-hash-table
255         nil nil nil nil ,(nhash.grow-threshold hash) ,(nhash.rehash-ratio hash) ,(nhash.rehash-size hash) ,(nhash.address-based hash) nil nil ,private)
256       `(%initialize-hash-table ,hash ',rehashF ,(convert keytransF) ,(convert compareF)
257                                ',vector ,count)))))
258
259(defun needs-rehashing (hash)
260  (%set-needs-rehashing hash))
261
262(defun %initialize-hash-table (hash rehashF keytransF compareF vector count)
263  (setf (nhash.rehashF hash) (symbol-function rehashF)
264        (nhash.keytransF hash) keytransF
265        (nhash.compareF hash) compareF
266        (nhash.vector hash) vector
267        (nhash.count hash) count)
268  (setf (nhash.find hash)
269        (case comparef
270          (0 #'eq-hash-find)
271          (-1 #'eql-hash-find)
272          (t #'general-hash-find))
273        (nhash.find-new hash)
274        (case comparef
275          (0 #'eq-hash-find-for-put)
276          (-1 #'eql-hash-find-for-put)
277          (t #'general-hash-find-for-put)))
278  (%set-needs-rehashing hash))
279
280
281;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
282;;
283;; Support for locking hash tables while fasdumping
284;;
285
286
287(defun fasl-lock-hash-table (hash-table)
288  (setq hash-table (require-type hash-table 'hash-table))
289  (without-interrupts
290   (let* ((lock (nhash.exclusion-lock hash-table)))
291     (if lock
292       (write-lock-rwlock lock)
293       (progn
294         (unless (eq (nhash.owner hash-table) *current-process*)
295           (error "Current process doesn't own hash-table ~s" hash-table))))
296     (push hash-table *fcomp-locked-hash-tables*))))
297
298(defun fasl-unlock-hash-tables ()
299  (dolist (h *fcomp-locked-hash-tables*)
300    (let* ((lock (nhash.exclusion-lock h)))
301      (if lock (unlock-rwlock lock)))))
302
303
304
305             
306
307#+not-yet
308(progn
309;;;;;;;;;;;;;
310;;
311;; Replacement for population
312;;
313(def-accessors (weak-table) %svref
314  nil                                   ; 'weak-table
315  weak-table.vector                     ; a $v_nhash vector
316  weak-table.index                      ; index for next entry
317  weak-table.grow-threshold             ; number of entries left in vector
318  )
319
320(defun make-weak-table (&optional (size 20))
321  (%istruct 'weak-table
322            (%cons-nhash-vector
323             size (+ (ash 1 $nhash_weak_bit)))
324            0
325            size))
326
327(defun weak-table-p (weak-table)
328  (istruct-typep weak-table 'weak-table))
329
330(setf (type-predicate 'weak-table) 'weak-table-p)
331
332(defun weak-table-count (weak-table)
333  (setq weak-table (require-type weak-table 'weak-table))
334  (- (weak-table.index weak-table)
335     (nhash.vector.weak-deletions-count (weak-table.vector weak-table))))
336
337(defun weak-table-push (key weak-table &optional value)
338  (setq weak-table (require-type weak-table 'weak-table))
339  (let ((thresh (weak-table.grow-threshold weak-table))
340        (vector (weak-table.vector weak-table))
341        (index (weak-table.index weak-table)))
342    (declare (fixnum thresh index))
343    (if (> thresh 0)
344      (progn
345        (lap-inline (index)
346          (:variable vector key value)
347          (move.l (varg vector) atemp0)
348          (lea (atemp0 arg_z.l $nhash_data) atemp0)
349          (move.l (varg key) atemp0@+)
350          (move.l (varg value) @atemp0))
351        (setf (weak-table.index weak-table) (the fixnum (1+ index))
352              (weak-table.grow-threshold weak-table) (the fixnum (1- thresh)))
353        value)
354      (let ((deletions (nhash.vector.weak-deletions-count vector)))
355        (declare (fixnum deletions))
356        (if (> deletions 0)
357          ; GC deleted some entries, we can compact the table
358          (progn
359            (lap-inline (index)
360              (:variable vector)
361              (getint arg_z)            ; length
362              (move.l (varg vector) atemp0)
363              (lea (atemp0 $nhash_data) atemp0)
364              (move.l atemp0 atemp1)
365              (move.l ($ $undefined) da)
366              ; Find the first deleted entry
367              (dbfloop.l arg_z
368                (if# (ne (cmp.l @atemp0 da))
369                  (add.l ($ 1) arg_z)
370                  (bra @move))
371                (add.w ($ 8) atemp0))
372              ; copy the rest of the table up
373              @move
374              (dbfloop.l arg_z
375                (move.l atemp0@+ db)
376                (if# (eq (cmp.l db da))
377                  (add.w ($ 4) atemp0)
378                 else#
379                  (move.l db atemp1@+)
380                  (move.l atemp0@+ atemp1@+)))
381              ; Write over the newly emptied part of the table
382              (while# (ne (cmp.l atemp0 atemp1))
383                (move.l da @atemp1)
384                (add.l ($ 8) atemp1)))
385            (setf (nhash.vector.weak-deletions-count vector) 0
386                  (weak-table.index weak-table) (the fixnum (- index deletions))
387                  (weak-table.grow-threshold weak-table) (the fixnum (+ thresh deletions)))
388            (weak-table-push key weak-table value))
389          ; table is full.  Grow it by a factor of 1.5
390          (let* ((new-size (+ index (the fixnum (ash (the fixnum (1+ index)) -1))))
391                 (new-vector (%cons-nhash-vector new-size (ash 1 $nhash_weak_bit))))
392            (declare (fixnum new-size))
393            (lap-inline (index)
394              (:variable vector new-vector count)
395              (move.l (varg vector) atemp0)
396              (move.l (varg new-vector) atemp1)
397              (lea (atemp0 $nhash_data) atemp0)
398              (lea (atemp1 $nhash_data) atemp1)
399              (getint arg_z)            ; table length
400              (dbfloop.l arg_z
401                (move.l atemp0@+ atemp1@+)
402                (move.l atemp0@+ atemp1@+)))
403            (setf (weak-table.vector weak-table) new-vector
404                  (weak-table.grow-threshold weak-table) (the fixnum (- new-size index)))
405            ; It's possible that GC deleted some entries while consing the new vector
406            (setf (nhash.vector.weak-deletions-count new-vector)
407                  (nhash.vector.weak-deletions-count vector))
408            (weak-table-push key weak-table value)))))))
409
410; function gets two args: key & value
411(defun map-weak-table (function weak-table)
412  (setq weak-table (require-type weak-table 'weak-table))
413  (let* ((vector (weak-table.vector weak-table))
414         (index (weak-table.index weak-table))
415         (flags (nhash.vector.flags vector)))
416    (unwind-protect
417      (progn
418        (setf (nhash.vector.flags vector) 0)    ; disable deletion by GC
419        (lap-inline ()
420          (:variable function vector index)
421          (while# (gt (move.l (varg index) da))
422            (sub.l '1 da)
423            (move.l da (varg index))
424            (move.l (varg vector) atemp0)
425            (move.l (atemp0 da.l $nhash_data) arg_y)
426            (if# (ne (cmp.w ($ $undefined) arg_y))
427              (move.l (atemp0 da.l (+ $nhash_data 4)) arg_z)
428              (set_nargs 2)
429              (move.l (varg function) atemp0)
430              (jsr_subprim $sp-funcall))))
431        nil)
432      (setf (nhash.vector.flags vector) flags))))
433
434; function gets one arg, the key
435(defun map-weak-table-keys (function weak-table)
436  (flet ((f (key value)
437           (declare (ignore value))
438           (funcall function key)))
439    (declare (dynamic-extent #'f))
440    (map-weak-table #'f weak-table)))
441   
442) ; #+not-yet
443
444; end
Note: See TracBrowser for help on using the repository browser.