source: branches/working-0710/ccl/lib/hash.lisp @ 7511

Last change on this file since 7511 was 7511, checked in by gb, 13 years ago

Add READONLY-HASH-TABLE-P and use it in HASH-TABLE's PRINT-OBJECT method.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 18.2 KB
Line 
1;;;-*- Mode: Lisp; Package: CCL -*-
2;;;
3;;;   Copyright (C) 1994-2001 Digitool, Inc
4;;;   This file is part of OpenMCL. 
5;;;
6;;;   OpenMCL is licensed under the terms of the Lisp Lesser GNU Public
7;;;   License , known as the LLGPL and distributed with OpenMCL as the
8;;;   file "LICENSE".  The LLGPL consists of a preamble and the LGPL,
9;;;   which is distributed with OpenMCL as the file "LGPL".  Where these
10;;;   conflict, the preamble takes precedence. 
11;;;
12;;;   OpenMCL is referenced in the preamble as the "LIBRARY."
13;;;
14;;;   The LLGPL is also available online at
15;;;   http://opensource.franz.com/preamble.html
16
17;;  This is just the stuff (make-load-form, print-object) that can't be fasloaded earlier.
18
19
20;;;;;;;;;;;;;
21;;
22;; hash.lisp
23;; New hash table implementation
24
25;;;;;;;;;;;;;
26;;
27;; Things I didn't do
28;;
29;; Save the 32-bit hash code along with the key so that growing the table can
30;; avoid calling the hashing function (at least until a GC happens during growing).
31;;
32;; Maybe use Knuth's better method for hashing:
33;; find two primes N-2, N.  N is the table size.
34;; First probe is at primary = (mod (funcall (nhash.keytransF h) key) N)
35;; Secondary probes are spaced by (mod (funcall (nhash.keytransF h) key) N-2)
36;; This does a bit better scrambling of the secondary probes, but costs another divide.
37;;
38;; Rethink how finalization is reported to the user.  Maybe have a finalization function which
39;; is called with the hash table and the deleted key & value.
40
41
42;;;;;;;;;;;;;
43;;
44;; Documentation
45;;
46;; MAKE-HASH-TABLE is extended to accept a :HASH-FUNCTION keyword arg which
47;; defaults for the 4 Common Lisp defined :TEST's.  Also, any fbound symbol can
48;; be used for the :TEST argument.  The HASH-FUNCTION is a function of one
49;; argument, the key, which returns one or two values:
50;;
51;; 1) HASH-CODE
52;; 2) ADDRESSP
53;;
54;; The HASH-CODE can be any object.  If it is a relocateable object (not a
55;; fixnum, short float, or immediate) then ADDRESSP will default to :KEY
56;; and it is an error if NIL is returned for ADDRESSP.
57;;
58;; If ADDRESSP is NIL, the hashing code assumes that no addresses were used
59;; in computing the HASH-CODE.  If ADDRESSP is :KEY (which is the default
60;; if the hash function returns only one value and it is relocateable) then
61;; the hashing code assumes that only the KEY's address was used to compute
62;; the HASH-CODE.  Otherwise, it is assumed that the address of a
63;; component of the key was used to compute the HASH-CODE.
64;;
65;;
66;;
67;; Some (proposed) functions for using in user hashing functions:
68;;
69;; (HASH-CODE object)
70;;
71;; returns two values:
72;;
73;; 1) HASH-CODE
74;; 2) ADDRESSP
75;;
76;; HASH-CODE is the object transformed into a fixnum by changing its tag
77;; bits to a fixnum's tag.  ADDRESSP is true if the object was
78;; relocateable. ;;
79;;
80;; (FIXNUM-ADD o1 o2)
81;; Combines two objects additively and returns a fixnum.
82;; If the two objects are fixnums, will be the same as (+ o1 o2) except
83;; that the result can not be a bignum.
84;;
85;; (FIXNUM-MULTIPLY o1 o2)
86;; Combines two objects multiplicatively and returns a fixnum.
87;;
88;; (FIXNUM-FLOOR dividend &optional divisor)
89;; Same as Common Lisp's FLOOR function, but converts the objects into
90;; fixnums before doing the divide and returns two fixnums: quotient &
91;; remainder.
92;;
93;;;;;;;;;;;;;
94;;
95;; Implementation details.
96;;
97;; Hash table vectors have a header that the garbage collector knows
98;; about followed by alternating keys and values.  Empty slots have a
99;; key of (%UNBOUND-MARKER), deleted slots are denoted by a key of
100;; (%SLOT-UNBOUND-MARKER).
101;;
102;; Four bits in the nhash.vector.flags fixnum interact with the garbage
103;; collector.  This description uses the symbols that represent bit numbers
104;; in a fixnum.  $nhash_xxx_bit has a corresponding $nhash_lap_xxx_bit which
105;; gives the byte offset of the bit for LAP code.  The two bytes in
106;; question are at offsets $nhash.vector-weak-byte and
107;; $nhash.vector-track-keys-byte offsets from the tagged vector.
108;; The 32 bits of the fixnum at nhash.vector.flags look like:
109;;
110;;     TK0C0000 00000000 WVF00000 00000000
111;;
112;;
113;; $nhash_track_keys_bit         "T" in the diagram above
114;;                               Sign bit of the longword at $nhash.vector.flags
115;;                               or the byte at $nhash.vector-track-keys-byte.
116;;                               If set, GC tracks relocation of keys in the
117;;                               vector.
118;; $nhash_key_moved_bit          "K" in the diagram above
119;;                               Set by GC to indicate that a key moved.
120;;                               If $nhash_track_keys_bit is clear, this bit is set to
121;;                               indicate that any GC will require a rehash.
122;;                               GC never clears this bit, but may set it if
123;;                               $nhash_track_keys_bit is set.
124;; $nhash_component_address_bit  "C" in the diagram above.
125;;                               Ignored by GC.  Set to indicate that the
126;;                               address of a component of a key was used.
127;;                               Means that $nhash_track_keys_bit will
128;;                               never be set until all such keys are
129;;                               removed.
130;; $nhash_weak_bit               "W" in the diagram above
131;;                               Sign bit of the byte at $nhash.vector-weak-byte
132;;                               Set to indicate a weak hash table
133;; $nhash_weak_value_bit         "V" in the diagram above
134;;                               If clear, the table is weak on key
135;;                               If set, the table is weak on value
136;; $nhash_finalizeable_bit       "F" in the diagram above
137;;                               If set the table is finalizeable:
138;;                               If any key/value pairs are removed, they will be added to
139;;                               the nhash.vector.finalization-alist using cons cells
140;;                               from nhash.vector.free-alist
141
142(in-package "CCL")
143
144
145(eval-when (:compile-toplevel :execute)
146  (require "HASHENV" "ccl:xdump;hashenv"))
147
148(defvar *hash-table-class*
149  (progn
150;    #+sparc-target (dbg)
151    (make-built-in-class 'hash-table *istruct-class*)))
152
153(setf (type-predicate 'hash-table) 'hash-table-p)
154
155
156(defmethod print-object ((table hash-table) stream)
157  (print-unreadable-object (table stream :type t :identity t)
158    (format stream "~S ~S size ~D/~D"
159            ':test (hash-table-test table)
160            (hash-table-count table)
161            (hash-table-size table))
162    (when (readonly-hash-table-p table)
163      (format stream " (Readonly)"))))
164
165
166;;; Of course, the lisp version of this would be too slow ...
167(defun hash-table-finalization-list (hash-table)
168  (unless (hash-table-p hash-table)
169    (report-bad-arg hash-table 'hash-table))
170  (let* ((vector (nhash.vector hash-table))
171         (flags (nhash.vector.flags vector)))
172    (declare (fixnum flags))
173    (if (logbitp $nhash_finalizeable_bit flags)
174      (nhash.vector.finalization-alist vector)
175      (error "~S is not a finalizeable hash table" hash-table))))
176
177(defun (setf hash-table-finalization-list) (value hash-table)
178  (unless (hash-table-p hash-table)
179    (report-bad-arg hash-table 'hash-table))
180  (let* ((vector (nhash.vector hash-table))
181         (flags (nhash.vector.flags vector)))
182    (declare (fixnum flags))
183    (if (logbitp $nhash_finalizeable_bit flags)
184      (setf (nhash.vector.finalization-alist vector) value)
185      (error "~S is not a finalizeable hash table" hash-table))))
186
187(defsetf gethash puthash)
188
189; Returns nil, :key or :value
190(defun hash-table-weak-p (hash)
191  (unless (hash-table-p hash)
192    (setq hash (require-type hash 'hash-table)))
193  (let* ((vector (nhash.vector hash))
194         (flags (nhash.vector.flags vector)))
195    (when (logbitp $nhash_weak_bit flags)
196      (if (logbitp $nhash_weak_value_bit flags)
197        :value
198        :key))))
199
200;;; It would be pretty complicated to offer a way of doing (SETF
201;;; HASH-TABLE-WEAK-P) after the hash-table's been created, and
202;;; it's not clear that that'd be incredibly useful.
203
204
205
206;;;;;;;;;;;;;
207;;
208;; Mapping functions
209;;
210
211
212(defun start-hash-table-iterator (hash state)
213  (let (vector locked)
214    (unless (hash-table-p hash)
215      (setf (hti.hash-table state) nil)         ; for finish-hash-table-iterator
216      (report-bad-arg hash 'hash-table))
217
218    (without-interrupts
219     (setf (hti.hash-table state) hash)
220     (setf (hti.lock state) (setq locked (not (eq :readonly (lock-hash-table-for-map hash)))))
221     (when locked (%lock-gc-lock))
222     (setq vector (nhash.vector hash))
223     (setf (hti.vector state) vector)
224     (setf (hti.index state) (nhash.vector-size vector))
225     (setf (hti.prev-iterator state) (nhash.iterator hash)
226           (nhash.iterator hash) state)
227     (when (%needs-rehashing-p hash)
228       (%rehash hash)))))
229 
230;;; this is as fast as the lappy version
231
232(defun do-hash-table-iteration (state)
233  (let ((vector (hti.vector state))
234        (index (hti.index state))
235        key value)
236    (declare (optimize (speed 3) (safety 0)))
237    (if (setf (hti.index state)
238              (if index
239                (loop
240                  (if (eq index 0)(return (setq index nil)))
241                  (locally (declare (fixnum index))
242                    (setq index (- index 1))
243                    (let* ((vector-index (index->vector-index index)))
244                      (declare (fixnum vector-index))
245                      (setq key (%svref vector vector-index))
246                      (unless (or (eq key (%unbound-marker))
247                                  (eq key (%slot-unbound-marker)))
248                        (setq value (%svref vector (the fixnum (1+ vector-index))))
249                        (return index)))))))
250      (let* ((hash (hti.hash-table state)))
251        (setf (nhash.vector.cache-idx (setq vector (nhash.vector hash))) index
252              (nhash.vector.cache-key vector) key
253              (nhash.vector.cache-value vector) value)
254        (values t key value)))))
255
256(defun finish-hash-table-iterator (state)
257  (without-interrupts
258   (let ((hash (hti.hash-table state))
259         (locked (hti.lock state)))
260     (when hash
261       (setf (hti.hash-table state) nil)
262       (when locked
263         (unlock-hash-table hash nil)
264         (%unlock-gc-lock))
265       (when (eq state (nhash.iterator hash))
266         (setf (nhash.iterator hash) (hti.prev-iterator state)))
267       (setf
268        (hti.index state)  nil
269        (hti.vector state) nil
270        (hti.lock state)   nil)))))
271
272(defun maphash (function hash-table)
273  "For each entry in HASH-TABLE, call the designated two-argument function
274   on the key and value of the entry. Return NIL."
275  (with-hash-table-iterator (m hash-table)
276    (loop
277      (multiple-value-bind (found key value) (m)
278        (unless found (return))
279        (funcall function key value)))))
280
281
282
283(defmethod make-load-form ((hash hash-table) &optional env)
284  (declare (ignore env))
285  (let ((rehashF (function-name (nhash.rehashF hash)))
286        (keytransF (nhash.keytransF hash))
287        (compareF (nhash.compareF hash))
288        (vector (nhash.vector hash))
289        (private (if (nhash.owner hash) '*current-process*))
290        (count (nhash.count hash)))
291    (flet ((convert (f)
292             (if (or (fixnump f) (symbolp f))
293               `',f
294               `(symbol-function ',(function-name f)))))
295      (values
296       `(%cons-hash-table
297         nil nil nil nil ,(nhash.grow-threshold hash) ,(nhash.rehash-ratio hash) ,(nhash.rehash-size hash) ,(nhash.address-based hash) nil nil ,private)
298       `(%initialize-hash-table ,hash ',rehashF ,(convert keytransF) ,(convert compareF)
299                                ',vector ,count)))))
300
301(defun needs-rehashing (hash)
302  (%set-needs-rehashing hash))
303
304(defun %initialize-hash-table (hash rehashF keytransF compareF vector count)
305  (setf (nhash.rehashF hash) (symbol-function rehashF)
306        (nhash.keytransF hash) keytransF
307        (nhash.compareF hash) compareF
308        (nhash.vector hash) vector
309        (nhash.count hash) count)
310  (setf (nhash.find hash)
311        (case comparef
312          (0 #'eq-hash-find)
313          (-1 #'eql-hash-find)
314          (t #'general-hash-find))
315        (nhash.find-new hash)
316        (case comparef
317          (0 #'eq-hash-find-for-put)
318          (-1 #'eql-hash-find-for-put)
319          (t #'general-hash-find-for-put)))
320  (%set-needs-rehashing hash))
321
322
323;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
324;;
325;; Support for locking hash tables while fasdumping
326;;
327
328
329(defun fasl-lock-hash-table (hash-table)
330  (setq hash-table (require-type hash-table 'hash-table))
331  (without-interrupts
332   (let* ((lock (nhash.exclusion-lock hash-table)))
333     (if lock
334       (write-lock-rwlock lock)
335       (progn
336         (unless (eq (nhash.owner hash-table) *current-process*)
337           (error "Current process doesn't own hash-table ~s" hash-table))))
338     (push hash-table *fcomp-locked-hash-tables*))))
339
340(defun fasl-unlock-hash-tables ()
341  (dolist (h *fcomp-locked-hash-tables*)
342    (let* ((lock (nhash.exclusion-lock h)))
343      (if lock (unlock-rwlock lock)))))
344
345
346
347             
348
349#+not-yet
350(progn
351;;;;;;;;;;;;;
352;;
353;; Replacement for population
354;;
355(def-accessors (weak-table) %svref
356  nil                                   ; 'weak-table
357  weak-table.vector                     ; a $v_nhash vector
358  weak-table.index                      ; index for next entry
359  weak-table.grow-threshold             ; number of entries left in vector
360  )
361
362(defun make-weak-table (&optional (size 20))
363  (%istruct 'weak-table
364            (%cons-nhash-vector
365             size (+ (ash 1 $nhash_weak_bit)))
366            0
367            size))
368
369(defun weak-table-p (weak-table)
370  (istruct-typep weak-table 'weak-table))
371
372(setf (type-predicate 'weak-table) 'weak-table-p)
373
374(defun weak-table-count (weak-table)
375  (setq weak-table (require-type weak-table 'weak-table))
376  (- (weak-table.index weak-table)
377     (nhash.vector.weak-deletions-count (weak-table.vector weak-table))))
378
379(defun weak-table-push (key weak-table &optional value)
380  (setq weak-table (require-type weak-table 'weak-table))
381  (let ((thresh (weak-table.grow-threshold weak-table))
382        (vector (weak-table.vector weak-table))
383        (index (weak-table.index weak-table)))
384    (declare (fixnum thresh index))
385    (if (> thresh 0)
386      (progn
387        (lap-inline (index)
388          (:variable vector key value)
389          (move.l (varg vector) atemp0)
390          (lea (atemp0 arg_z.l $nhash_data) atemp0)
391          (move.l (varg key) atemp0@+)
392          (move.l (varg value) @atemp0))
393        (setf (weak-table.index weak-table) (the fixnum (1+ index))
394              (weak-table.grow-threshold weak-table) (the fixnum (1- thresh)))
395        value)
396      (let ((deletions (nhash.vector.weak-deletions-count vector)))
397        (declare (fixnum deletions))
398        (if (> deletions 0)
399          ; GC deleted some entries, we can compact the table
400          (progn
401            (lap-inline (index)
402              (:variable vector)
403              (getint arg_z)            ; length
404              (move.l (varg vector) atemp0)
405              (lea (atemp0 $nhash_data) atemp0)
406              (move.l atemp0 atemp1)
407              (move.l ($ $undefined) da)
408              ; Find the first deleted entry
409              (dbfloop.l arg_z
410                (if# (ne (cmp.l @atemp0 da))
411                  (add.l ($ 1) arg_z)
412                  (bra @move))
413                (add.w ($ 8) atemp0))
414              ; copy the rest of the table up
415              @move
416              (dbfloop.l arg_z
417                (move.l atemp0@+ db)
418                (if# (eq (cmp.l db da))
419                  (add.w ($ 4) atemp0)
420                 else#
421                  (move.l db atemp1@+)
422                  (move.l atemp0@+ atemp1@+)))
423              ; Write over the newly emptied part of the table
424              (while# (ne (cmp.l atemp0 atemp1))
425                (move.l da @atemp1)
426                (add.l ($ 8) atemp1)))
427            (setf (nhash.vector.weak-deletions-count vector) 0
428                  (weak-table.index weak-table) (the fixnum (- index deletions))
429                  (weak-table.grow-threshold weak-table) (the fixnum (+ thresh deletions)))
430            (weak-table-push key weak-table value))
431          ; table is full.  Grow it by a factor of 1.5
432          (let* ((new-size (+ index (the fixnum (ash (the fixnum (1+ index)) -1))))
433                 (new-vector (%cons-nhash-vector new-size (ash 1 $nhash_weak_bit))))
434            (declare (fixnum new-size))
435            (lap-inline (index)
436              (:variable vector new-vector count)
437              (move.l (varg vector) atemp0)
438              (move.l (varg new-vector) atemp1)
439              (lea (atemp0 $nhash_data) atemp0)
440              (lea (atemp1 $nhash_data) atemp1)
441              (getint arg_z)            ; table length
442              (dbfloop.l arg_z
443                (move.l atemp0@+ atemp1@+)
444                (move.l atemp0@+ atemp1@+)))
445            (setf (weak-table.vector weak-table) new-vector
446                  (weak-table.grow-threshold weak-table) (the fixnum (- new-size index)))
447            ; It's possible that GC deleted some entries while consing the new vector
448            (setf (nhash.vector.weak-deletions-count new-vector)
449                  (nhash.vector.weak-deletions-count vector))
450            (weak-table-push key weak-table value)))))))
451
452; function gets two args: key & value
453(defun map-weak-table (function weak-table)
454  (setq weak-table (require-type weak-table 'weak-table))
455  (let* ((vector (weak-table.vector weak-table))
456         (index (weak-table.index weak-table))
457         (flags (nhash.vector.flags vector)))
458    (unwind-protect
459      (progn
460        (setf (nhash.vector.flags vector) 0)    ; disable deletion by GC
461        (lap-inline ()
462          (:variable function vector index)
463          (while# (gt (move.l (varg index) da))
464            (sub.l '1 da)
465            (move.l da (varg index))
466            (move.l (varg vector) atemp0)
467            (move.l (atemp0 da.l $nhash_data) arg_y)
468            (if# (ne (cmp.w ($ $undefined) arg_y))
469              (move.l (atemp0 da.l (+ $nhash_data 4)) arg_z)
470              (set_nargs 2)
471              (move.l (varg function) atemp0)
472              (jsr_subprim $sp-funcall))))
473        nil)
474      (setf (nhash.vector.flags vector) flags))))
475
476; function gets one arg, the key
477(defun map-weak-table-keys (function weak-table)
478  (flet ((f (key value)
479           (declare (ignore value))
480           (funcall function key)))
481    (declare (dynamic-extent #'f))
482    (map-weak-table #'f weak-table)))
483   
484) ; #+not-yet
485
486; end
Note: See TracBrowser for help on using the repository browser.