source: branches/qres/ccl/lib/hash.lisp @ 13564

Last change on this file since 13564 was 13070, checked in by gz, 10 years ago

r13066, r13067 from trunk: copyrights etc

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 16.9 KB
Line 
1;;;-*- Mode: Lisp; Package: CCL -*-
2;;;
3;;;   Copyright (C) 2009 Clozure Associates
4;;;   Copyright (C) 1994-2001 Digitool, Inc
5;;;   This file is part of Clozure CL. 
6;;;
7;;;   Clozure CL is licensed under the terms of the Lisp Lesser GNU Public
8;;;   License , known as the LLGPL and distributed with Clozure CL as the
9;;;   file "LICENSE".  The LLGPL consists of a preamble and the LGPL,
10;;;   which is distributed with Clozure CL as the file "LGPL".  Where these
11;;;   conflict, the preamble takes precedence. 
12;;;
13;;;   Clozure CL is referenced in the preamble as the "LIBRARY."
14;;;
15;;;   The LLGPL is also available online at
16;;;   http://opensource.franz.com/preamble.html
17
18;;  This is just the stuff (make-load-form, print-object) that can't be fasloaded earlier.
19
20
21;;;;;;;;;;;;;
22;;
23;; hash.lisp
24;; New hash table implementation
25
26;;;;;;;;;;;;;
27;;
28;; Things I didn't do
29;;
30;; Save the 32-bit hash code along with the key so that growing the table can
31;; avoid calling the hashing function (at least until a GC happens during growing).
32;;
33;; Maybe use Knuth's better method for hashing:
34;; find two primes N-2, N.  N is the table size.
35;; First probe is at primary = (mod (funcall (nhash.keytransF h) key) N)
36;; Secondary probes are spaced by (mod (funcall (nhash.keytransF h) key) N-2)
37;; This does a bit better scrambling of the secondary probes, but costs another divide.
38;;
39;; Rethink how finalization is reported to the user.  Maybe have a finalization function which
40;; is called with the hash table and the deleted key & value.
41
42
43;;;;;;;;;;;;;
44;;
45;; Documentation
46;;
47;; MAKE-HASH-TABLE is extended to accept a :HASH-FUNCTION keyword arg which
48;; defaults for the 4 Common Lisp defined :TEST's.  Also, any fbound symbol can
49;; be used for the :TEST argument.  The HASH-FUNCTION is a function of one
50;; argument, the key, which returns one or two values:
51;;
52;; 1) HASH-CODE
53;; 2) ADDRESSP
54;;
55;; The HASH-CODE can be any object.  If it is a relocateable object (not a
56;; fixnum, short float, or immediate) then ADDRESSP will default to :KEY
57;; and it is an error if NIL is returned for ADDRESSP.
58;;
59;; If ADDRESSP is NIL, the hashing code assumes that no addresses were used
60;; in computing the HASH-CODE.  If ADDRESSP is :KEY (which is the default
61;; if the hash function returns only one value and it is relocateable) then
62;; the hashing code assumes that only the KEY's address was used to compute
63;; the HASH-CODE.  Otherwise, it is assumed that the address of a
64;; component of the key was used to compute the HASH-CODE.
65;;
66;;
67;;
68;; Some (proposed) functions for using in user hashing functions:
69;;
70;; (HASH-CODE object)
71;;
72;; returns two values:
73;;
74;; 1) HASH-CODE
75;; 2) ADDRESSP
76;;
77;; HASH-CODE is the object transformed into a fixnum by changing its tag
78;; bits to a fixnum's tag.  ADDRESSP is true if the object was
79;; relocateable. ;;
80;;
81;; (FIXNUM-ADD o1 o2)
82;; Combines two objects additively and returns a fixnum.
83;; If the two objects are fixnums, will be the same as (+ o1 o2) except
84;; that the result can not be a bignum.
85;;
86;; (FIXNUM-MULTIPLY o1 o2)
87;; Combines two objects multiplicatively and returns a fixnum.
88;;
89;; (FIXNUM-FLOOR dividend &optional divisor)
90;; Same as Common Lisp's FLOOR function, but converts the objects into
91;; fixnums before doing the divide and returns two fixnums: quotient &
92;; remainder.
93;;
94;;;;;;;;;;;;;
95;;
96;; Implementation details.
97;;
98;; Hash table vectors have a header that the garbage collector knows
99;; about followed by alternating keys and values.  Empty slots have a
100;; key of (%UNBOUND-MARKER), deleted slots are denoted by a key of
101;; (%SLOT-UNBOUND-MARKER), except in the case of "lock-free" hash
102;; tables, which see below.
103;;
104;; Four bits in the nhash.vector.flags fixnum interact with the garbage
105;; collector.  This description uses the symbols that represent bit numbers
106;; in a fixnum.  $nhash_xxx_bit has a corresponding $nhash_lap_xxx_bit which
107;; gives the byte offset of the bit for LAP code.  The two bytes in
108;; question are at offsets $nhash.vector-weak-byte and
109;; $nhash.vector-track-keys-byte offsets from the tagged vector.
110;; The raw 32 bits of the fixnum at nhash.vector.flags look like:
111;;
112;;     TKEC0000 00000000 WVFZ0000 00000000
113;;
114;;
115;; $nhash_track_keys_bit         "T" in the diagram above
116;;                               Sign bit of the longword at $nhash.vector.flags
117;;                               or the byte at $nhash.vector-track-keys-byte.
118;;                               If set, GC tracks relocation of keys in the
119;;                               vector.
120;; $nhash_key_moved_bit          "K" in the diagram above
121;;                               Set by GC to indicate that a key moved.
122;;                               If $nhash_track_keys_bit is clear, this bit is set to
123;;                               indicate that any GC will require a rehash.
124;;                               GC never clears this bit, but may set it if
125;;                               $nhash_track_keys_bit is set.
126;; $nhash_component_address_bit  "C" in the diagram above.
127;;                               Ignored by GC.  Set to indicate that the
128;;                               address of a component of a key was used.
129;;                               Means that $nhash_track_keys_bit will
130;;                               never be set until all such keys are
131;;                               removed.
132;; $nhash_weak_bit               "W" in the diagram above
133;;                               Sign bit of the byte at $nhash.vector-weak-byte
134;;                               Set to indicate a weak hash table
135;; $nhash_weak_value_bit         "V" in the diagram above
136;;                               If clear, the table is weak on key
137;;                               If set, the table is weak on value
138;; $nhash_finalizeable_bit       "F" in the diagram above
139;;                               If set the table is finalizeable:
140;;                               If any key/value pairs are removed, they will be added to
141;;                               the nhash.vector.finalization-alist using cons cells
142;;                               from nhash.vector.free-alist
143;; $nhash_keys_frozen_bit       "Z" in diagram above.
144;;                               If set, GC will remove weak entries by setting the
145;;                               value to (%slot-unbound-marker), leaving key unchanged.
146
147(in-package "CCL")
148
149
150(eval-when (:compile-toplevel :execute)
151  (require "HASHENV" "ccl:xdump;hashenv"))
152
153(defvar *hash-table-class*
154  (progn
155;    #+sparc-target (dbg)
156    (find-class 'hash-table)))
157
158(setf (type-predicate 'hash-table) 'hash-table-p)
159
160
161(defmethod print-object ((table hash-table) stream)
162  (print-unreadable-object (table stream :type t :identity t)
163    (format stream "~S ~S size ~D/~D"
164            ':test (hash-table-test table)
165            (hash-table-count table)
166            (hash-table-size table))
167    (when (readonly-hash-table-p table)
168      (format stream " (Readonly)"))))
169
170
171#+vaporware
172;;; Of course, the lisp version of this would be too slow ...
173(defun hash-table-finalization-list (hash-table)
174  (unless (hash-table-p hash-table)
175    (report-bad-arg hash-table 'hash-table))
176  (let* ((vector (nhash.vector hash-table))
177         (flags (nhash.vector.flags vector)))
178    (declare (fixnum flags))
179    (if (logbitp $nhash_finalizeable_bit flags)
180      (nhash.vector.finalization-alist vector)
181      (error "~S is not a finalizeable hash table" hash-table))))
182
183#+vaporware
184(defun (setf hash-table-finalization-list) (value hash-table)
185  (unless (hash-table-p hash-table)
186    (report-bad-arg hash-table 'hash-table))
187  (let* ((vector (nhash.vector hash-table))
188         (flags (nhash.vector.flags vector)))
189    (declare (fixnum flags))
190    (if (logbitp $nhash_finalizeable_bit flags)
191      (setf (nhash.vector.finalization-alist vector) value)
192      (error "~S is not a finalizeable hash table" hash-table))))
193
194(defsetf gethash puthash)
195
196; Returns nil, :key or :value
197(defun hash-table-weak-p (hash)
198  (unless (hash-table-p hash)
199    (setq hash (require-type hash 'hash-table)))
200  (let* ((vector (nhash.vector hash))
201         (flags (nhash.vector.flags vector)))
202    (when (logbitp $nhash_weak_bit flags)
203      (if (logbitp $nhash_weak_value_bit flags)
204        :value
205        :key))))
206
207;;; It would be pretty complicated to offer a way of doing (SETF
208;;; HASH-TABLE-WEAK-P) after the hash-table's been created, and
209;;; it's not clear that that'd be incredibly useful.
210
211
212
213;;;;;;;;;;;;;
214;;
215;; Mapping functions
216;;
217
218
219
220(defun next-hash-table-iteration-1 (state)
221  (do* ((index (nhti.index state) (1+ index))
222        (keys (nhti.keys state))
223        (values (nhti.values state))
224        (nkeys (nhti.nkeys state)))
225       ((>= index nkeys)
226        (setf (nhti.index state) nkeys)
227        nil)
228    (declare (fixnum index nkeys)
229             (simple-vector keys))
230    (let* ((key (svref keys index))
231           (value (svref values index)))
232        (setf (nhti.index state) (1+ index))
233        (return (values t key value)))))
234
235
236
237(defun maphash (function hash-table)
238  "For each entry in HASH-TABLE, call the designated two-argument function
239   on the key and value of the entry. Return NIL."
240  (with-hash-table-iterator (m hash-table)
241    (loop
242      (multiple-value-bind (found key value) (m)
243        (unless found (return))
244        (funcall function key value)))))
245
246
247
248(defmethod make-load-form ((hash hash-table) &optional env)
249  (declare (ignore env))
250  (%normalize-hash-table-count hash)
251  (let ((keytransF (nhash.keytransF hash))
252        (compareF (nhash.compareF hash))
253        (vector (nhash.vector hash))
254        (private (if (nhash.owner hash) '*current-process*))
255        (lock-free-p (logtest $nhash.lock-free (the fixnum (nhash.lock hash)))))
256    (flet ((convert (f)
257             (if (or (fixnump f) (symbolp f))
258               `',f
259               `(symbol-function ',(function-name f)))))
260      (values
261       `(%cons-hash-table
262         nil nil nil ,(nhash.grow-threshold hash) ,(nhash.rehash-ratio hash) ,(nhash.rehash-size hash)
263        nil nil ,private ,lock-free-p)
264       `(%initialize-hash-table ,hash ,(convert keytransF) ,(convert compareF) ',vector)))))
265
266(defun needs-rehashing (hash)
267  (%set-needs-rehashing hash))
268
269(defun %initialize-hash-table (hash keytransF compareF vector)
270  (setf (nhash.keytransF hash) keytransF
271        (nhash.compareF hash) compareF)
272  (setf (nhash.find hash)
273        (case comparef
274          (0 #'eq-hash-find)
275          (-1 #'eql-hash-find)
276          (t #'general-hash-find))
277        (nhash.find-new hash)
278        (case comparef
279          (0 #'eq-hash-find-for-put)
280          (-1 #'eql-hash-find-for-put)
281          (t #'general-hash-find-for-put)))
282  (setf (nhash.vector hash) vector)
283  (%set-needs-rehashing hash))
284
285
286;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
287;;
288;; Support for locking hash tables while fasdumping
289;;
290
291
292(defun fasl-lock-hash-table (hash-table)
293  (setq hash-table (require-type hash-table 'hash-table))
294  (without-interrupts
295   (let* ((lock (nhash.exclusion-lock hash-table)))
296     (if lock
297       (progn
298         (if (hash-lock-free-p hash-table)
299           ;; For lock-free hash tables, this only makes sure nobody is
300           ;; rehashing the table.  It doesn't necessarily stop readers
301           ;; or writers (unless they need to rehash).
302           (grab-lock lock)
303           (write-lock-rwlock lock))
304         (push hash-table *fcomp-locked-hash-tables*))
305       (unless (eq (nhash.owner hash-table) *current-process*)
306         (error "Current process doesn't own hash-table ~s" hash-table))))))
307
308(defun fasl-unlock-hash-tables ()
309  (dolist (h *fcomp-locked-hash-tables*)
310    (let* ((lock (nhash.exclusion-lock h)))
311      (if (hash-lock-free-p h)
312        (release-lock lock)
313        (unlock-rwlock lock)))))
314
315
316
317             
318
319#+not-yet
320(progn
321;;;;;;;;;;;;;
322;;
323;; Replacement for population
324;;
325(def-accessors (weak-table) %svref
326  nil                                   ; 'weak-table
327  weak-table.vector                     ; a $v_nhash vector
328  weak-table.index                      ; index for next entry
329  weak-table.grow-threshold             ; number of entries left in vector
330  )
331
332(defun make-weak-table (&optional (size 20))
333  (%istruct 'weak-table
334            (%cons-nhash-vector
335             size (+ (ash 1 $nhash_weak_bit)))
336            0
337            size))
338
339(defun weak-table-p (weak-table)
340  (istruct-typep weak-table 'weak-table))
341
342(setf (type-predicate 'weak-table) 'weak-table-p)
343
344(defun weak-table-count (weak-table)
345  (setq weak-table (require-type weak-table 'weak-table))
346  (- (weak-table.index weak-table)
347     (nhash.vector.weak-deletions-count (weak-table.vector weak-table))))
348
349(defun weak-table-push (key weak-table &optional value)
350  (setq weak-table (require-type weak-table 'weak-table))
351  (let ((thresh (weak-table.grow-threshold weak-table))
352        (vector (weak-table.vector weak-table))
353        (index (weak-table.index weak-table)))
354    (declare (fixnum thresh index))
355    (if (> thresh 0)
356      (progn
357        (lap-inline (index)
358          (:variable vector key value)
359          (move.l (varg vector) atemp0)
360          (lea (atemp0 arg_z.l $nhash_data) atemp0)
361          (move.l (varg key) atemp0@+)
362          (move.l (varg value) @atemp0))
363        (setf (weak-table.index weak-table) (the fixnum (1+ index))
364              (weak-table.grow-threshold weak-table) (the fixnum (1- thresh)))
365        value)
366      (let ((deletions (nhash.vector.weak-deletions-count vector)))
367        (declare (fixnum deletions))
368        (if (> deletions 0)
369          ; GC deleted some entries, we can compact the table
370          (progn
371            (lap-inline (index)
372              (:variable vector)
373              (getint arg_z)            ; length
374              (move.l (varg vector) atemp0)
375              (lea (atemp0 $nhash_data) atemp0)
376              (move.l atemp0 atemp1)
377              (move.l ($ $undefined) da)
378              ; Find the first deleted entry
379              (dbfloop.l arg_z
380                (if# (ne (cmp.l @atemp0 da))
381                  (add.l ($ 1) arg_z)
382                  (bra @move))
383                (add.w ($ 8) atemp0))
384              ; copy the rest of the table up
385              @move
386              (dbfloop.l arg_z
387                (move.l atemp0@+ db)
388                (if# (eq (cmp.l db da))
389                  (add.w ($ 4) atemp0)
390                 else#
391                  (move.l db atemp1@+)
392                  (move.l atemp0@+ atemp1@+)))
393              ; Write over the newly emptied part of the table
394              (while# (ne (cmp.l atemp0 atemp1))
395                (move.l da @atemp1)
396                (add.l ($ 8) atemp1)))
397            (setf (nhash.vector.weak-deletions-count vector) 0
398                  (weak-table.index weak-table) (the fixnum (- index deletions))
399                  (weak-table.grow-threshold weak-table) (the fixnum (+ thresh deletions)))
400            (weak-table-push key weak-table value))
401          ; table is full.  Grow it by a factor of 1.5
402          (let* ((new-size (+ index (the fixnum (ash (the fixnum (1+ index)) -1))))
403                 (new-vector (%cons-nhash-vector new-size (ash 1 $nhash_weak_bit))))
404            (declare (fixnum new-size))
405            (lap-inline (index)
406              (:variable vector new-vector count)
407              (move.l (varg vector) atemp0)
408              (move.l (varg new-vector) atemp1)
409              (lea (atemp0 $nhash_data) atemp0)
410              (lea (atemp1 $nhash_data) atemp1)
411              (getint arg_z)            ; table length
412              (dbfloop.l arg_z
413                (move.l atemp0@+ atemp1@+)
414                (move.l atemp0@+ atemp1@+)))
415            (setf (weak-table.vector weak-table) new-vector
416                  (weak-table.grow-threshold weak-table) (the fixnum (- new-size index)))
417            ; It's possible that GC deleted some entries while consing the new vector
418            (setf (nhash.vector.weak-deletions-count new-vector)
419                  (nhash.vector.weak-deletions-count vector))
420            (weak-table-push key weak-table value)))))))
421
422; function gets two args: key & value
423(defun map-weak-table (function weak-table)
424  (setq weak-table (require-type weak-table 'weak-table))
425  (let* ((vector (weak-table.vector weak-table))
426         (index (weak-table.index weak-table))
427         (flags (nhash.vector.flags vector)))
428    (unwind-protect
429      (progn
430        (setf (nhash.vector.flags vector) 0)    ; disable deletion by GC
431        (lap-inline ()
432          (:variable function vector index)
433          (while# (gt (move.l (varg index) da))
434            (sub.l '1 da)
435            (move.l da (varg index))
436            (move.l (varg vector) atemp0)
437            (move.l (atemp0 da.l $nhash_data) arg_y)
438            (if# (ne (cmp.w ($ $undefined) arg_y))
439              (move.l (atemp0 da.l (+ $nhash_data 4)) arg_z)
440              (set_nargs 2)
441              (move.l (varg function) atemp0)
442              (jsr_subprim $sp-funcall))))
443        nil)
444      (setf (nhash.vector.flags vector) flags))))
445
446; function gets one arg, the key
447(defun map-weak-table-keys (function weak-table)
448  (flet ((f (key value)
449           (declare (ignore value))
450           (funcall function key)))
451    (declare (dynamic-extent #'f))
452    (map-weak-table #'f weak-table)))
453   
454) ; #+not-yet
455
456; end
Note: See TracBrowser for help on using the repository browser.