source: branches/portable/btrees.lisp@ 31

Last change on this file since 31 was 20, checked in by wws, 10 years ago

Btrees appear to work.

Reimplemented ccl::%copy-ivector-to-ivector to work around a bug.

  • Property svn:eol-style set to native
File size: 94.5 KB
Line 
1;;;-*- Mode: Lisp; Package: WOOD -*-
2
3;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4;;;
5;;; btrees.lisp
6;;; B* trees with variable length keys for pheaps.
7;;;
8;;; Copyright © 1996-1997 Digitool, Inc.
9;;; Copyright © 1992-1995 Apple Computer, Inc.
10;;; All rights reserved.
11;;; Permission is given to use, copy, and modify this software provided
12;;; that Digitool is given credit in all derivative works.
13;;; This software is provided "as is". Digitool makes no warranty or
14;;; representation, either express or implied, with respect to this software,
15;;; its quality, accuracy, merchantability, or fitness for a particular
16;;; purpose.
17;;;
18
19;;; Key size is limited to 127 bytes with longer keys
20;;; being stored as strings (and requiring an extra disk access).
21;;; (longer strings are not yet implemented).
22
23;;;;;;;;;;;;;;;;;;;;;;;;;;
24;;;
25;;; Modification History
26;;;
27;;; 03/26/97 bill check-btree-consistency avoids divide by zero errors.
28;;; ------------- 0.96
29;;; 08/27/96 bill Wrap an eval-when around the definition of *enable-debug-break* as
30;;; suggested by Rainer Joswig.
31;;; ------------- 0.95
32;;; 05/13/96 bill mis-parenthesization of compare-string in %btree-search-node
33;;; ------------- 0.94 = MCL-PPC 3.9
34;;; 03/26/96 bill Don't use %schar in %btree-search-node, MCL-PPC type checks it.
35;;; 03/21/96 bill Specify element-type of base-character to make-string calls
36;;; Add an optional swap-space-in-k arg to time-btree-store & time-btree-read
37;;; ------------- 0.93
38;;; 08/10/95 bill p-do-btree macro is to a btree what dolist is to a list.
39;;; 07/02/95 bill Fix fencepost in computation of fill-count in %shift-node-left
40;;; Thanks to Horace Enea for providing the test case to track down this bug.
41;;; 05/25/95 bill New parameter: *max-btree-node-size*.
42;;; dc-cons-btree-node calls dc-allocate-new-btree-node instead of
43;;; %dc-allocate-new-memory.
44;;; New function: dc-allocate-new-btree-node, calls %dc-allocate-new-memory
45;;; as before if the page size is <= *max-btree-node-size*. Otherwise, carves
46;;; up a disk page into *max-btree-node-size* nodes, returns one of them, and
47;;; adds the rest to the btree node free list.
48;;; 04/11/95 bill new function: p-map-btree-keystrings.
49;;; dc-map-btree uses compare-strings-function instead of string<
50;;; for comparing with the TO arg.
51;;; ------------- 0.9
52;;; 02/06/95 bill Moon's idea to add binary search to %btree-search-node without
53;;; changing the disk representation. That and doing the comparisons
54;;; inline got speed improvement of a factor of 2.7 over the old linear
55;;; search code.
56;;; 12/16/94 bill Complete rewrite of the low-level insertion code.
57;;; This fixes a bug found by Paul.Meurer@mi.uib.no and
58;;; will make much easier the addition of binary search for nodes.
59;;; 10/28/94 Moon Change without-interrupts to with-databases-locked.
60;;; 10/25/94 bill (setf (p-btree-store ...) x) now returns x instead of its pptr.
61;;; 09/21/94 bill without-interrupts as necessary for interlocking
62;;; ------------- 0.8
63;;; 07/31/93 bill %btree-insert-in-inner-node now goes out of line to
64;;; %attempt-to-shift-left-inner-node & %attempt-to-shift-right-inner-node.
65;;; %attempt-to-shift-right-inner-node is new code. I thought
66;;; that I could get away with leaving it out, but I was wrong.
67;;; 03/29/93 bill *forwarded-btree* in btree-find-leaf-node & dc-map-btree
68;;; 03/25/93 bill %btree-split-inner-node - wrong page on one of the accessing-byte-array's
69;;; Also, neglected to update last-middle-size when the parent entry went at the
70;;; end of the new middle node. In this case, some of the copying was also extraneous.
71;;; -------------- 0.6
72;;; -------------- 0.5
73;;; 07/28/92 bill make p-map-btree deal correctly with insertion or
74;;; deletion while mapping.
75;;; 07/27/92 bill p-clear-btree, p-map-btree
76;;; 06/30/92 bill little bug in %split-btree-root
77;;; 06/26/92 bill btree vector indices defs -> woodequ
78;;; 06/23/92 bill Don't ignore type in p-make-btree
79;;; -------------- 0.1
80;;;
81
82;;;;;;;;;;;;;;;;;;;;;;;;;;
83;;;
84;;; To do:
85;;;
86;;; 1) Maybe.
87;;; Replace the $btree_parent slot with $btree_mod-count for locking use.
88;;; Updating parents at shift or split time is too expensive.
89;;; Instead, pass around an ancestors list (stack-consed).
90;;;
91;;; 2) Implement keys longer than 127 bytes.
92
93(in-package :wood)
94
95(export '(p-make-btree p-make-string-equal-btree
96 p-btree-lookup p-btree-store p-btree-delete
97 dc-make-btree dc-btree-lookup dc-btree-store dc-btree-delete))
98
99;;;;;;;;;;;;;;;;;;;;;;;;;;
100;;;
101;;; btree vector - subtype $v_btree
102;;;
103;;; This code belongs in woodequ.lisp
104;;;
105
106; So this will work as a patch file
107(eval-when (:compile-toplevel :execute :load-toplevel)
108
109(unless (boundp '$btree.max-key-size)
110
111(makunbound '$btree-size)
112
113; Defined here so that this file can be distributed as a patch
114; Real definition is in "woodequ.lisp"
115(def-indices
116 $btree.root ; the root node
117 $btree.count ; number of leaf entries
118 $btree.depth ; 0 means only the root exists
119 $btree.nodes ; total number of nodes
120 $btree.first-leaf ; first leaf node. A constant
121 $btree.type ; type bits
122 $btree.max-key-size ; maximum size of a key
123 $btree-size ; length of a $v_btree vector
124 )
125
126)) ; End unless & compile-when
127
128; This file is being distributed before the with-databases-locked macro
129; is defined in "block-io-mcl.lisp"
130(eval-when (:compile-toplevel :execute :load-toplevel)
131
132(unless (fboundp 'with-databases-locked)
133 (defmacro with-databases-locked (&body body)
134 `(progn ,@body)))
135
136)
137
138;;;;;;;;;;;;;;;;;;;;;;;;;;
139;;;
140;;; Node layout - subtype $v_btree-node
141;;;
142;;; -------------------
143;;; | $vector-header |
144;;; | subtype length |
145;;; |-------------------|
146;;; | link |
147;;; | parent |
148;;; | used free |
149;;; | count flags |
150;;; | pointer[0] |
151;;; | len[0] key[0] ... |
152;;; | pointer[1] |
153;;; | len[1] key[1] ... |
154;;; | ... |
155;;; | pointer[m] |
156;;; | len[m] key[m] ... |
157;;; | pointer[m+1] |
158;;; -------------------
159
160;;; $vector-header is the standard vector header marker
161;;; subtype is one byte, it's value is $v_btree-node
162;;; length is the total length of the data portion of the block in bytes
163;;; link is used by the GC so that it can walk btree nodes last.
164;;; parent points at the parent node of this one, or at the btree
165;;; uvector for the root.
166;;; used is 16 bits: the number of bytes that are in use at $btree_data
167;;; free is 16 bits: the number of free bytes at the end of the block.
168;;; count is 16 bits: the number of entries in this node
169;;; flags is 16 bits of flags.
170;;; Bit 0 is set for a leaf page.
171;;; Bit 1 is set for the root page.
172;;; pointer[i] is 4 bytes aligned on a 4-byte boundary.
173;;; For a leaf node, it points at the indexed data.
174;;; For a non-leaf node, it points at another node in the tree
175;;; This node is the branch of the tree containing all entries whose
176;;; keys are <= key[i].
177;;; pointer[m+1] for a leaf node points to the next leaf node.
178;;; This makes a linked list of all the leaf nodes,
179;;; which is useful for mapping over the entries.
180;;; pointer[m+1] for a non-leaf node points at the branch of the tree
181;;; containing entries whose keys are > key[m]
182;;; len[i] is a byte giving the length of key[i]
183;;; if len[i] is 255, then there are three unused bytes followed
184;;; by a four byte pointer to a string containing the key.
185;;; otherwise, len[i] will always be < 128
186;;; (keys longer than 127 bytes are not yet implemented)
187;;; key[i] is len[i] bytes of characters for the key followed
188;;; by enough padding bytes to get to the next 4-byte boundary.
189
190(defconstant $btree_link $v_data)
191(defconstant $btree_parent (+ $btree_link 4))
192(defconstant $btree_used (+ $btree_parent 4))
193(defconstant $btree_free (+ $btree_used 2))
194(defconstant $btree_count (+ $btree_free 2))
195(defconstant $btree_flags (+ $btree_count 2))
196(defconstant $btree_data (+ $btree_flags 2))
197
198(defconstant $btree_flags.leaf-bit 0)
199(defconstant $btree_flags.root-bit 1)
200
201(defmacro with-simple-string ((var string) &body body)
202 (let ((thunk (gensym)))
203 `(let ((,thunk #'(lambda (,var) ,@body)))
204 (declare (dynamic-extent ,thunk))
205 (funcall-with-simple-string ,string ,thunk))))
206
207(defun funcall-with-simple-string (string thunk)
208 (if (simple-string-p string)
209 (funcall thunk string)
210 (let* ((len (length string))
211 (simple-string (make-string len)))
212 (declare (dynamic-extent simple-string))
213 (multiple-value-bind (str offset) (array-data-and-offset string)
214 (wood::%load-string str offset len simple-string))
215 (funcall thunk simple-string))))
216
217; Bound by the garbage collector. We won't type-check this
218; btree as it's subtype has been overwritten with a forwarding pointer.
219(defvar *forwarded-btree* nil)
220
221;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
222;;;
223;;; The documented interface
224;;;
225
226(defun p-make-btree (pheap &key area type)
227 (pptr pheap
228 (dc-make-btree (pheap-disk-cache pheap)
229 (and area (pheap-pptr-pointer area pheap))
230 (and type (require-type type 'fixnum)))))
231
232(defun p-make-string-equal-btree (pheap &key area)
233 (p-make-btree pheap :area area :type $btree-type_string-equal))
234
235(defun p-btree-lookup (btree key-string &optional default)
236 (let ((pheap (pptr-pheap btree)))
237 (multiple-value-bind (pointer immediate? found?)
238 (dc-btree-lookup
239 (pheap-disk-cache pheap)
240 (pptr-pointer btree)
241 (if (stringp key-string)
242 key-string
243 (p-load key-string)))
244 (if found?
245 (values
246 (if immediate?
247 pointer
248 (pptr pheap pointer))
249 t)
250 default))))
251
252(defun p-btree-store (btree key-string default &optional (value default))
253 (let ((pheap (pptr-pheap btree)))
254 (multiple-value-bind (pointer immediate?)
255 (%p-store pheap value)
256 (dc-btree-store
257 (pheap-disk-cache pheap)
258 (pptr-pointer btree)
259 (if (stringp key-string)
260 key-string
261 (p-load key-string))
262 pointer
263 immediate?)
264 (if immediate?
265 pointer
266 (pptr pheap pointer)))))
267
268(defun setf-p-btree-lookup (btree key-string default &optional (value default))
269 (p-btree-store btree key-string default value)
270 value)
271
272(defsetf p-btree-lookup setf-p-btree-lookup)
273
274(defun p-btree-delete (btree key-string)
275 (dc-btree-delete
276 (pptr-disk-cache btree)
277 (pptr-pointer btree)
278 (if (stringp key-string)
279 key-string
280 (p-load key-string))))
281
282(defun p-clear-btree (btree)
283 (dc-clear-btree (pptr-disk-cache btree)
284 (pptr-pointer btree))
285 btree)
286
287(defmacro p-do-btree ((key value) (btree &optional from to) &body body)
288 (let ((mapper (gensym)))
289 `(let ((,mapper #'(lambda (,key ,value) ,@body)))
290 (declare (dynamic-extent ,mapper))
291 ,(if (or from to)
292 `(p-map-btree ,btree ,mapper ,from ,to)
293 `(p-map-btree ,btree ,mapper)))))
294
295(defun p-map-btree (btree function &optional from to)
296 (let* ((pheap (pptr-pheap btree))
297 (f #'(lambda (disk-cache key value imm?)
298 (declare (ignore disk-cache))
299 (funcall function key (if imm? value (pptr pheap value))))))
300 (declare (dynamic-extent f))
301 (dc-map-btree (pheap-disk-cache pheap)
302 (pptr-pointer btree)
303 f
304 (if (or (null from) (stringp from))
305 from
306 (p-load from))
307 (if (or (null to) (stringp to))
308 to
309 (p-load to)))))
310
311(defun p-map-btree-keystrings (btree function &optional from to)
312 (let* ((pheap (pptr-pheap btree))
313 (f #'(lambda (disk-cache key value imm?)
314 (declare (ignore disk-cache value imm?))
315 (funcall function key))))
316 (declare (dynamic-extent f))
317 (dc-map-btree (pheap-disk-cache pheap)
318 (pptr-pointer btree)
319 f
320 (if (or (null from) (stringp from))
321 from
322 (p-load from))
323 (if (or (null to) (stringp to))
324 to
325 (p-load to)))))
326
327;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
328;;;
329;;; disk-cache versions of the documented interface
330;;;
331
332(defun dc-make-btree (disk-cache &optional area type)
333 (with-databases-locked
334 (let* ((btree (dc-make-uvector disk-cache $btree-size $v_btree area 0 t))
335 (root (dc-cons-btree-node
336 disk-cache btree btree
337 (logior (ash 1 $btree_flags.leaf-bit) (ash 1 $btree_flags.root-bit)))))
338 (accessing-disk-cache (disk-cache)
339 (svset.p btree $btree.root root)
340 (svset.p btree $btree.first-leaf root)
341 (when type
342 (svset.p btree $btree.type (require-type type 'fixnum) t))
343 (svset.p btree $btree.max-key-size 0))
344 btree)))
345
346(defun dc-btree-lookup (disk-cache btree key-string)
347 (with-databases-locked
348 (with-simple-string (key-string key-string)
349 (multiple-value-bind (node offset eq)
350 (btree-find-leaf-node disk-cache btree key-string)
351 (when eq
352 (multiple-value-bind (pointer immediate?)
353 (read-pointer disk-cache (+ node offset))
354 (values pointer immediate? t)))))))
355
356(defun dc-btree-store (disk-cache btree key-string value &optional
357 value-imm?)
358 (with-databases-locked
359 (with-simple-string (key-string key-string)
360 (when (> (length key-string) 127)
361 #-gz(error "Keys longer than 127 bytes not supported yet.")
362 #+gz(warn "Keys longer than 127 bytes not supported yet.")
363 #+gz (setq key-string (subseq key-string 0 127)))
364 (multiple-value-bind (node offset eq)
365 (btree-find-leaf-node disk-cache btree key-string)
366 (if eq
367 (setf (read-pointer disk-cache (+ node offset) value-imm?)
368 value)
369 (progn
370 (%btree-insert-in-node
371 disk-cache btree node offset key-string value value-imm?)
372 (accessing-disk-cache (disk-cache)
373 (svset.p btree $btree.count (1+ (svref.p btree $btree.count)) t))
374 (values value value-imm?)))))))
375
376(defun dc-btree-delete (disk-cache btree key-string)
377 (with-databases-locked
378 (with-simple-string (key-string key-string)
379 (if (> (length key-string) 127)
380 (error "Keys longer than 127 bytes not supported yet."))
381 (multiple-value-bind (node offset eq)
382 (btree-find-leaf-node disk-cache btree key-string)
383 (when eq
384 (%btree-delete-from-node disk-cache btree node offset t)
385 (accessing-disk-cache (disk-cache)
386 (svset.p btree $btree.count (1- (svref.p btree $btree.count)) t))
387 t)))))
388
389(defun dc-clear-btree (disk-cache btree)
390 (require-satisfies dc-vector-subtype-p disk-cache btree $v_btree)
391 (let (root-node first-leaf)
392 (with-databases-locked
393 (setq root-node (dc-%svref disk-cache btree $btree.root)
394 first-leaf (dc-%svref disk-cache btree $btree.first-leaf))
395 (accessing-disk-cache (disk-cache first-leaf)
396 (multiple-value-bind (node used free) (init-btree-node disk-cache first-leaf)
397 (declare (ignore node))
398 (fill.b (+ $btree_data used) 0 free))
399 (store.l btree $btree_parent)
400 (store.w (logior (ash 1 $btree_flags.root-bit)
401 (ash 1 $btree_flags.leaf-bit)
402 (load.w $btree_flags))
403 $btree_flags))
404 (dc-%svfill disk-cache btree
405 $btree.root first-leaf
406 ($btree.count t) 0
407 ($btree.count t) 0
408 ($btree.nodes t) 1)
409 (when (> (dc-uvsize disk-cache btree) $btree.max-key-size) ; backward compatibility
410 (setf (dc-%svref disk-cache btree $btree.max-key-size t) 0)))
411 ; Eventually, rewrite dc-%clear-node to occasionally allow interrupts.
412 (with-databases-locked
413 (dc-%clear-node disk-cache root-node first-leaf)))
414 btree)
415
416(defun dc-%clear-node (disk-cache node first-leaf)
417 (require-satisfies dc-vector-subtype-p disk-cache node $v_btree-node)
418 (unless (eql node first-leaf)
419 (with-locked-page (disk-cache node nil buf offset)
420 (accessing-byte-array (buf offset)
421 (unless (logbitp $btree_flags.leaf-bit (load.w $btree_flags))
422 (let ((p $btree_data))
423 (declare (fixnum p))
424 (dotimes (i (load.w $btree_count))
425 (dc-%clear-node disk-cache (load.l p) first-leaf)
426 (incf p 4)
427 (incf p (normalize-size (1+ (load.b p)) 4)))
428 (dc-%clear-node disk-cache (load.l p) first-leaf)))
429 (dc-free-btree-node disk-cache nil node)))))
430
431; This assumes that actions at event-processing level don't change the
432; B-tree (i.e. the may read the database but not write it), so we can
433; release the database lock around the call to the user function.
434; Otherwise the database would have to be locked across the entire operation.
435; with-databases-locked is required around btree-find-leaf-node, because that
436; function doesn't call with-databases-locked itself, and also because the
437; with-locked-page below has a timing hazard (buf could be reused before
438; the page has been locked) unless it is called inside with-databases-locked.
439(defun dc-map-btree (disk-cache btree function &optional from to)
440 (if (null from)
441 (unless (eql btree *forwarded-btree*)
442 (require-satisfies dc-vector-subtype-p disk-cache btree $v_btree))
443 (unless (stringp from)
444 (setq from (require-type from '(or null string)))))
445 (unless (or (null to) (stringp to))
446 (setq to (require-type to '(or null string))))
447 (with-databases-locked
448 (multiple-value-bind (node p)
449 (if from
450 (btree-find-leaf-node disk-cache btree from)
451 (values (dc-%svref disk-cache btree $btree.first-leaf)
452 $btree_data))
453 (declare (fixnum p))
454 (let ((string-equal-function (string-equal-function disk-cache btree))
455 (compare-strings-function (and to (compare-strings-function disk-cache btree))))
456 (loop
457 (block once-per-node
458 (with-locked-page (disk-cache node nil buf buf-offset)
459 (accessing-byte-array (buf buf-offset)
460 (let ((max-p (+ $btree_data (load.w $btree_used) -4)))
461 (declare (fixnum max-p))
462 (loop
463 (when (>= p max-p)
464 (when (> p max-p)
465 (error "Inconsistency: pointer off end of btree node"))
466 (return))
467 (multiple-value-bind (value imm?) (load.p p)
468 (let* ((len (load.b (incf p 4)))
469 (key (make-string len)))
470 (declare (fixnum len)
471 (dynamic-extent key))
472 (load.string (the fixnum (1+ p)) len key)
473 (when (and to (< (the fixnum (funcall compare-strings-function to key)) 0))
474 (return-from dc-map-btree nil))
475 (with-databases-unlocked
476 (funcall function disk-cache key value imm?))
477 (let ((newlen (load.b p)))
478 (declare (fixnum newlen))
479 (unless (and (eql newlen len)
480 (let ((new-key (make-string newlen)))
481 (declare (dynamic-extent new-key))
482 (load.string (the fixnum (1+ p)) newlen new-key)
483 (funcall string-equal-function key new-key)))
484 ; The user inserted or deleted something that caused
485 ; the key to move. Need to find it again.
486 (let (eq)
487 (multiple-value-setq (node p eq)
488 (btree-find-leaf-node disk-cache btree key))
489 (when eq
490 (incf p (normalize-size (1+ len) 4)))
491 (return-from once-per-node))))
492 (incf p (normalize-size (1+ len) 4))))))
493 (setq node (load.l p)
494 p $btree_data)
495 (when (eql node $pheap-nil)
496 (return nil))))))))))
497
498;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
499;;;
500;;; Grungy internal details
501;;; First, some generally useful utility functions
502;;;
503
504(defparameter *max-btree-node-size* 512)
505
506(defun dc-cons-btree-node (disk-cache btree parent flags)
507 (let ((node (or (with-locked-page (disk-cache (+ $root-vector $v_data
508 (* 4 $pheap.btree-free-list)))
509 (accessing-disk-cache (disk-cache)
510 (let ((free-list (svref.p $root-vector $pheap.btree-free-list)))
511 (unless (eql $pheap-nil free-list)
512 (svset.p $root-vector $pheap.btree-free-list
513 (accessing-disk-cache (disk-cache free-list)
514 (load.l $btree_parent)))
515 free-list))))
516 (dc-allocate-new-btree-node disk-cache))))
517 (accessing-disk-cache (disk-cache node)
518 (store.l parent $btree_parent)
519 (store.w flags $btree_flags))
520 (with-locked-page (disk-cache (+ btree $v_data (* 4 $btree.nodes)) t)
521 (accessing-disk-cache (disk-cache)
522 (svset.p btree $btree.nodes (1+ (svref.p btree $btree.nodes)) t)))
523 node))
524
525; This takes care of chopping up a disk block into btree nodes.
526; This is necessary if the page size is bigger than the *max-btree-node-size*.
527; Otherwise, we just use one disk page.
528(defun dc-allocate-new-btree-node (disk-cache)
529 (let ((node (if (>= *max-btree-node-size* (disk-cache-page-size disk-cache))
530 (%dc-allocate-new-memory disk-cache 1 $v_btree-node 0 t)
531 ; Could do most of this size computation at pheap open time and cache the
532 ; results, but it's in the noise compared to everything else.
533 (let* ((segment (%dc-allocate-new-memory disk-cache 1 $v_segment nil))
534 (segment-size (dc-%vector-size disk-cache segment))
535 (segment-header-bytes (* 4 $segment-header-size))
536 (bytes (- segment-size segment-header-bytes))
537 (bytes-per-node *max-btree-node-size*)
538 (count (ceiling bytes bytes-per-node))
539 (size-per-node (- bytes-per-node $vector-header-size))
540 (first-node-bytes (- bytes (* (1- count) bytes-per-node)))
541 (first-node-size (- first-node-bytes $vector-header-size))
542 (ptr (+ segment $v_data segment-header-bytes))
543 res)
544 (dc-%svfill disk-cache segment
545 $segment.area $pheap-nil
546 $segment.header $pheap-nil)
547 (dotimes (i count)
548 (if (eql i 0)
549 (progn
550 (setq res (initialize-vector-storage
551 disk-cache ptr first-node-size $v_btree-node 1 0 t))
552 (incf ptr first-node-bytes))
553 (progn
554 (dc-free-btree-node
555 disk-cache nil
556 (initialize-vector-storage
557 disk-cache ptr size-per-node $v_btree-node 1 nil))
558 (incf ptr bytes-per-node))))
559 (unless (eql (+ segment $v_data segment-size) ptr)
560 (error "ptr not at segment end"))
561 res))))
562 (init-btree-node disk-cache node)))
563
564(defun dc-free-btree-node (disk-cache btree node)
565 (multiple-value-bind (node used free) (init-btree-node disk-cache node)
566 (accessing-disk-cache (disk-cache node)
567 (fill.b (+ $btree_data used) 0 free)))
568 (with-locked-page (disk-cache $root-vector t)
569 (accessing-disk-cache (disk-cache)
570 (let ((free-list (svref.p $root-vector $pheap.btree-free-list)))
571 (accessing-disk-cache (disk-cache node)
572 (store.l free-list $btree_parent)))
573 (svset.p $root-vector $pheap.btree-free-list node)))
574 (when btree
575 (with-locked-page (disk-cache (+ btree $v_data (* 4 $btree.nodes)) t)
576 (accessing-disk-cache (disk-cache)
577 (svset.p btree $btree.nodes (1- (svref.p btree $btree.nodes)) t)))))
578
579(defun init-btree-node (disk-cache node)
580 (accessing-disk-cache (disk-cache node)
581 (let* ((vector-size (%vector-size.p node))
582 (data-size (- vector-size (- $btree_data $v_data)))
583 (used 4)
584 (free (- data-size used)))
585 (store.l $pheap-nil $btree_link)
586 (store.w used $btree_used)
587 (store.w free $btree_free)
588 (store.w 0 $btree_count)
589 (store.l $pheap-nil $btree_data)
590 (values node used free))))
591
592(defun %btree-leaf-node-p (disk-cache node)
593 (accessing-disk-cache (disk-cache node)
594 (logbitp $btree_flags.leaf-bit (load.w $btree_flags))))
595
596(defun %btree-root-node-p (disk-cache node)
597 (accessing-disk-cache (disk-cache node)
598 (logbitp $btree_flags.root-bit (load.w $btree_flags))))
599
600(defun compare-strings-function (disk-cache btree)
601 (if (logbitp $btree-type_string-equal-bit
602 (dc-%svref disk-cache btree $btree.type))
603 #'compare-strings-case-insensitive
604 #'compare-strings))
605
606; New function
607(defun compare-strings-case-insensitive (str1 str2)
608 (cond ((string-lessp str1 str2) -1)
609 ((string-equal str1 str2) 0)
610 (t 1)))
611
612; New function
613(defun string-equal-function (disk-cache btree)
614 (if (logbitp $btree-type_string-equal-bit
615 (dc-%svref disk-cache btree $btree.type))
616 #'string-equal
617 #'string=))
618
619; Returns two values:
620; 1) offset - from node for the place where key-string goes
621; 2) eq - True if the key at this offset is key-string
622(defun btree-find-leaf-node (disk-cache btree key-string)
623 (unless (eql btree *forwarded-btree*)
624 (require-satisfies dc-vector-subtype-p disk-cache btree $v_btree))
625 (let ((node (dc-%svref disk-cache btree $btree.root))
626 (case-sensitive? (not (logbitp $btree-type_string-equal-bit
627 (dc-%svref disk-cache btree $btree.type)))))
628 (loop
629 (multiple-value-bind (offset eq)
630 (%btree-search-node
631 disk-cache node key-string case-sensitive?)
632 (when (%btree-leaf-node-p disk-cache node)
633 (return (values node offset eq)))
634 (setq node (read-long disk-cache (+ node offset)))
635 (require-satisfies dc-vector-subtype-p
636 disk-cache node $v_btree-node)))))
637
638; This one calls the disk-cache code directly and accesses the
639; page vector itself so that it can be reasonably fast.
640; Returns same two values as btree-find-leaf-node
641; plus a third value which is the offset to the node just to the left of the found one.
642
643
644#| ; old linear search code
645(defun %btree-search-node (disk-cache node key-string case-sensitive?)
646 (let ((compare-strings-function (if case-sensitive? #'compare-strings #'compare-strings-case-insensitive)))
647 (with-locked-page (disk-cache node nil vec offset bytes)
648 (declare (fixnum offset bytes))
649 (accessing-byte-array (vec offset)
650 (let* ((end (+ offset $btree_data (load.uw $btree_used)))
651 (ptr (+ offset $btree_data 4))
652 (last-ptr nil))
653 (declare (fixnum end ptr))
654 (declare (fixnum offset bytes))
655 (unless (>= (the fixnum (+ offset bytes)) end)
656 (error "End of btree node is past end of disk page"))
657 (loop
658 (if (>= ptr end)
659 (return (values (- ptr offset 4)
660 nil
661 (if last-ptr (- last-ptr offset 4)))))
662 (let* ((len (aref vec ptr))
663 (str (make-string len)))
664 (declare (dynamic-extent str))
665 (%copy-byte-array-portion vec (the fixnum (1+ ptr)) len str 0)
666 ; Here's where we'll eventually use part of the
667 ; $btree_flags to select a sorting predicate.
668 (let ((compare (funcall compare-strings-function key-string str)))
669 (declare (fixnum compare))
670 (when (<= compare 0)
671 (return (values (- ptr offset 4)
672 (eql compare 0)
673 (if last-ptr (- last-ptr offset 4))))))
674 (setq last-ptr ptr)
675 (incf ptr (normalize-size (+ 5 len) 4)))))))))
676|#
677
678#+GZ
679(defun check-used-counts (where disk-cache node)
680 (let* ((used (read-unsigned-word disk-cache (+ node $btree_used)))
681 (count (read-unsigned-word disk-cache (+ node $btree_count)))
682 (bytes (loop for n = 0 then (+ n (normalize-size (+ 5 len) 4))
683 for i from 0 below count
684 as len = (read-8-bits disk-cache (+ node $btree_data n 4))
685 finally return n)))
686 ;(FORMAT T "~&CHECK-USED-COUNTS@~s node ~s: [count]=~s, [used]=~s, bytes ~s"
687 ; where node count used bytes)
688 where
689 (assert (<= bytes used))))
690
691
692
693; New binary search code: Moon's idea.
694(defun %btree-search-node (disk-cache node key-string case-sensitive?)
695 (with-locked-page (disk-cache node nil vec offset)
696 (declare (fixnum offset)
697 (type (simple-array (unsigned-byte 8) (*)) vec))
698 (accessing-byte-array (vec)
699
700 (let* ((count (load.uw (+ $btree_count offset)))
701 (min 0) ; inclusive lower bound
702 (max count) ; exclusive upper bound
703 (ptrs (make-array count))
704 (lens (make-array count))
705 (fill-pointer 0) ; unlike Common Lisp, this is an inclusive upper bound
706 (key-len (length key-string))
707 (offset+4 (+ offset 4))
708 (offset+5 (+ offset 5)))
709 (declare (fixnum count min max fill-pointer offset+4 offset+5)
710 (dynamic-extent ptrs lens)
711 (simple-vector ptrs lens)
712 (optimize (speed 3) (safety 0)))
713 (when (eql count 0)
714 (return-from %btree-search-node $btree_data))
715 (setf (svref ptrs 0) $btree_data
716 (svref lens 0) (aref vec (the fixnum (+ $btree_data offset+4)))) ; (load.b (+ $btree_data offset 4))
717 (flet ((get-ptr (index)
718 (declare (fixnum index))
719 (if (<= index fill-pointer)
720 (values (svref ptrs index) (svref lens index))
721 (let ((p (svref ptrs fill-pointer))
722 (len (svref lens fill-pointer)))
723 (declare (fixnum p len))
724 (loop
725 (incf p (the fixnum (normalize-size (the fixnum (+ 5 len)) 4)))
726 (setq len (aref vec (the fixnum (+ p offset+4)))) ; (load.b (+ p 4 offset))
727 (setf (svref ptrs (incf fill-pointer)) p)
728 (setf (svref lens fill-pointer) len)
729 (when (eql fill-pointer index)
730 (return (values p len)))))))
731 (compare-strings (s1 i1 end1 s2 i2 end2 case-sensitive?)
732 (declare (fixnum i1 end1 i2 end2)
733 (type simple-string s1)
734 (type (simple-array (unsigned-byte 8) (*)) s2)
735 (optimize (speed 3) (safety 0)))
736 ; s1 is a simple-string and s2 is a (simple-array (unsigned-byte 8) (*))
737 (if case-sensitive?
738 (loop
739 (when (>= i1 end1)
740 (return (if (eql i2 end2) 0 -1)))
741 (when (>= i2 end2) (return 1))
742 (let ((c1 (char-code (aref s1 i1)))
743 (c2 (aref s2 i2)))
744 (declare (fixnum c1 c2))
745 (if (<= c1 c2)
746 (if (< c1 c2)
747 (return -1))
748 (return 1)))
749 (incf i1)
750 (incf i2))
751 (loop
752 (when (>= i1 end1)
753 (return (if (eql i2 end2) 0 -1)))
754 (when (>= i2 end2) (return 1))
755 (let ((c1 (%char-code (char-upcase (aref s1 i1))))
756 (c2 (%char-code (char-upcase (%code-char (aref s2 i2))))))
757 (declare (fixnum c1 c2))
758 (if (<= c1 c2)
759 (if (< c1 c2)
760 (return -1))
761 (return 1)))
762 (incf i1)
763 (incf i2)))))
764 (declare (inline get-ptr compare-strings))
765 (loop
766 (let ((index (ash (the fixnum (+ min max)) -1)))
767 (declare (fixnum index))
768 (multiple-value-bind (ptr len) (get-ptr index)
769 (declare (fixnum ptr len))
770 (let* ((vec-idx (+ ptr (the fixnum offset+5)))
771 (vec-end (+ vec-idx len))
772 (compare (compare-strings key-string 0 key-len vec vec-idx vec-end case-sensitive?)))
773 (declare (fixnum vec-idx vec-end compare))
774 (if (<= compare 0)
775 (progn
776 (setq max index)
777 (when (or (eql compare 0) (eql min max))
778 (return (values ptr
779 (eql compare 0)
780 (unless (eql index 0)
781 (svref ptrs (the fixnum (1- index))))))))
782 (progn
783 (setq min (1+ index))
784 (when (eql min max)
785 (return (values (the fixnum (+ ptr (normalize-size (the fixnum (+ 5 len)) 4)))
786 nil
787 ptr))))))))))))))
788
789(defun compare-strings (str1 str2)
790 (cond ((string< str1 str2) -1)
791 ((string= str1 str2) 0)
792 (t 1)))
793
794; Search a node for a pointer to a subnode.
795; Return two values, the offset for the subnode, and the offset
796; for the subnode just before it.
797; If right-node-p is true, return a third value, the offset of the subnode
798; just after subnode.
799(defun %btree-search-for-subnode (disk-cache node subnode &optional right-node-p)
800 (with-locked-page (disk-cache node nil vec offset bytes)
801 (declare (fixnum offset bytes))
802 (accessing-byte-array (vec offset)
803 (let* ((end (+ offset $btree_data (load.uw $btree_used)))
804 (ptr (+ offset $btree_data))
805 (last-ptr nil))
806 (declare (fixnum end ptr))
807 (declare (fixnum offset bytes))
808 (unless (>= (the fixnum (+ offset bytes)) end)
809 (error "End of btree node is past end of disk page"))
810 (accessing-byte-array (vec)
811 (loop
812 (when (eql subnode (load.p ptr))
813 (return (values (- ptr offset)
814 (if last-ptr (- last-ptr offset))
815 (when right-node-p
816 (let ((right-ptr (+ ptr 4)))
817 (declare (fixnum right-ptr))
818 (unless (>= right-ptr end)
819 (incf right-ptr (normalize-size (1+ (load.b right-ptr)) 4))
820 (- right-ptr offset)))))))
821 (setq last-ptr ptr)
822 (incf ptr 4)
823 (if (>= ptr end)
824 (return nil))
825 (incf ptr (normalize-size (1+ (load.b ptr)) 4))))))))
826
827; Fill the SIZES array with the sizes of the entries in NODE.
828; If one of the entries is at INSERT-OFFSET, put INSERT-SIZE
829; into SIZES at that index, and return the index.
830; Otherwise, return NIL.
831(defun %lookup-node-sizes (disk-cache node sizes count &optional insert-offset insert-size
832 (start 0))
833 (accessing-disk-cache (disk-cache node)
834 (unless count
835 (setq count (load.uw $btree_count)))
836 (when insert-offset (incf count))
837 (let ((p (+ $btree_data 4))
838 (p-at-offset (and insert-offset (+ insert-offset 4)))
839 insert-index
840 (index (require-type start 'fixnum)))
841 (declare (fixnum p))
842 (dotimes (i count)
843 (if (eql p p-at-offset)
844 (setf (aref sizes index) insert-size
845 insert-index index
846 p-at-offset nil)
847 (incf p (setf (aref sizes index) (normalize-size (+ 5 (load.b p)) 4))))
848 (incf index))
849 (when (and insert-offset (null insert-index))
850 (error "Inconsistency: didn't find insert-offset"))
851 (unless (eql p (+ $btree_data (load.uw $btree_used)))
852 (error "Inconsistency: walking node's entries didn't end up at end"))
853 insert-index)))
854
855; When we move entries around in a non-leaf nodes, the parent pointers
856; need to be updated.
857; This will go away if I eliminate the parent pointers and replace
858; them with passing around the ancestor list.
859; Doing this will make insertion and deletion slightly faster
860; at the expense of making it hard to click around in a btree
861; in the inspector.
862(defun %btree-update-childrens-parents (disk-cache node &optional start-ptr end-ptr)
863 (with-locked-page (disk-cache node nil node-buf node-buf-offset)
864 (accessing-byte-array (node-buf)
865 (let* ((used (load.uw (+ node-buf-offset $btree_used)))
866 (p (or start-ptr (+ node-buf-offset $btree_data)))
867 (max-p (or end-ptr (+ node-buf-offset $btree_data used)))
868 child)
869 (declare (fixnum p max-p))
870 (loop
871 (setq child (load.p p))
872 (require-satisfies dc-vector-subtype-p disk-cache child $v_btree-node)
873 (accessing-disk-cache (disk-cache child)
874 (store.p node $btree_parent))
875 (incf p 4)
876 (when (>= p max-p)
877 (unless (eql p max-p)
878 (error "Inconsistency. Node scan went past expected end."))
879 (return))
880 (incf p (normalize-size (+ 1 (load.b p)) 4)))))))
881
882; Update and return the maximum key size
883(defun dc-btree-max-key-size (disk-cache btree &optional new-size)
884 (when new-size
885 (setq new-size (require-type new-size 'fixnum)))
886 (if (<= (dc-uvsize disk-cache btree) $btree.max-key-size)
887 132 ; old btrees don't track max key size
888 (let ((size (dc-%svref-fixnum disk-cache btree $btree.max-key-size "$btree.max-key-size")))
889 (if (and new-size (> new-size size))
890 (setf (dc-%svref disk-cache btree $btree.max-key-size t) new-size)
891 size))))
892
893;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
894;;
895;; Here's where the guts of an insert happens.
896;; We know that the key-string belongs at offset from node.
897;; Insert it there if it fits.
898;; Otherwise split this node into two after creating room in each
899;; node in the access path.
900;;
901
902(defun %btree-insert-in-node (disk-cache btree node offset key-string value
903 &optional value-imm? (key-length (length key-string)))
904 (accessing-disk-cache (disk-cache node)
905 (let* ((free (load.uw $btree_free))
906 (used (load.uw $btree_used))
907 (size (normalize-size (+ 5 key-length) 4))
908 (max-key-size (dc-btree-max-key-size disk-cache btree size))) ; update & get max key size
909 (declare (fixnum free used size))
910 (if (> key-length 127)
911 (error "Keys longer than 127 not supported yet."))
912 (when (<= size free)
913 ; Will fit in this node
914 (with-locked-page (disk-cache node t node-buf node-buf-offset nil node-page)
915 (let* ((bytes-to-move (- used (- offset $btree_data)))
916 (p (+ node-buf-offset offset)))
917 (declare (fixnum bytes-to-move p))
918 (%copy-byte-array-portion node-buf p bytes-to-move
919 node-buf (+ p size) node-page)
920 (%store-btree-entry
921 node-buf p node-page
922 key-string key-length value value-imm? size)
923 (accessing-byte-array (node-buf node-buf-offset node-page)
924 (store.w (1+ (load.uw $btree_count)) $btree_count)
925 (store.w (+ used size) $btree_used)
926 (store.w (- free size) $btree_free))))
927 (return-from %btree-insert-in-node nil))
928 ; Won't fit. Split the node
929 (unless (%btree-leaf-node-p disk-cache node)
930 (error "Wasn't room to insert in non-leaf node"))
931 (%create-insertion-path disk-cache btree key-string max-key-size)
932 (%split-node disk-cache btree node free used
933 t offset key-string value value-imm? key-length size)
934 ; This slightly increases non-leaf node space utilization, but
935 ; at a 10% time penalty, so I've nuked it.
936 ;(%sew-up-insertion-path disk-cache btree node t)
937 )))
938
939(eval-when (:load-toplevel :compile-toplevel :execute)
940
941(defvar *enable-debug-break* nil)
942
943) ; end of eval-when
944
945(defmacro debug-break (format-string &rest format-args)
946 (when *enable-debug-break*
947 `(%debug-break ,format-string ,@format-args)))
948
949(defun %debug-break (format-string &rest format-args)
950 (when *enable-debug-break*
951 (let ((*print-base* 16.))
952 (apply 'cerror "Continue" format-string format-args))))
953
954; There is an important difference between the leaf and non-leaf nodes.
955; When entries are shifted between leaf nodes, the parent node does not
956; need to be referenced except to update it with the last key in the
957; left-hand leaf node. When non-leaf nodes are shifted, the shifting needs
958; to go through the parent node, i.e. the key in the parent that is between
959; the keys in the left child and the right child does not appear in either child
960; and needs to move to one of them. After we're done, the parent will have
961; a new key that used to be in one of its children but isn't any more.
962
963(defun %split-node (disk-cache btree node free used leaf-p &optional
964 offset key-string value value-imm? key-length (size 0))
965 (when (%shift-node-left disk-cache btree node free used leaf-p
966 offset key-string value value-imm? key-length size)
967 (return-from %split-node :shift-left))
968 (when (%shift-node-right disk-cache btree node free used leaf-p
969 offset key-string value value-imm? key-length size)
970 (return-from %split-node :shift-right))
971 (with-locked-page (disk-cache node t node-buf node-buf-offset nil node-page)
972 (accessing-byte-array (node-buf node-buf-offset node-page)
973 (let* ((parent (load.p $btree_parent))
974 (old-count (load.uw $btree_count))
975 (count (if offset (1+ old-count) old-count))
976 (sizes (make-array count))
977 (insert-index (%lookup-node-sizes disk-cache node sizes old-count offset size))
978 (new-used (+ used size))
979 (last-new-used 0)
980 (new-count 0)
981 (last-size 0)
982 (right-node (dc-cons-btree-node
983 disk-cache btree parent (if leaf-p (ash 1 $btree_flags.leaf-bit) 0)))
984 (right-used 4)
985 (last-right-used 0)
986 (right-count 0)
987 (last-last-size 0))
988 (declare (fixnum old-count count new-used new-count last-size right-used right-count
989 last-new-used last-right-used last-last-size)
990 (dynamic-extent sizes))
991 (loop for i from (1- count) downto 0
992 finally (error "Didn't find a split point")
993 do
994 (setq last-new-used new-used
995 last-right-used right-used
996 last-last-size last-size)
997 (if leaf-p
998 (progn
999 (incf right-used (setq last-size (aref sizes i)))
1000 (decf new-used last-size))
1001 (progn
1002 (incf right-used last-last-size)
1003 (decf new-used (setq last-size (aref sizes i)))))
1004 (when (>= right-used new-used)
1005 (setq new-count i)
1006 (let ((diff (- right-used new-used))
1007 (old-diff (- last-new-used last-right-used)))
1008 (when (> diff old-diff)
1009 (setq new-used last-new-used
1010 right-used last-right-used
1011 new-count (1+ i))))
1012 (setq last-size (aref sizes (1- new-count)))
1013 (return)))
1014 (debug-break "after figuring where to put split")
1015 (with-locked-page (disk-cache right-node t right-buf right-offset nil right-page)
1016 (setq right-count (- count new-count (if leaf-p 0 1)))
1017 (let* ((insert-middle (if leaf-p
1018 (eql insert-index (1- new-count))
1019 (eql insert-index new-count)))
1020 (insert-left (and insert-index (< insert-index new-count)))
1021 (insert-not-right (or (not insert-index) insert-middle insert-left))
1022 (end-ptr (+ node-buf-offset $btree_data new-used))
1023 (node-ptr (- end-ptr
1024 (if insert-left size 0)
1025 (if leaf-p last-size 0)))
1026 (last-string-length (if insert-middle
1027 key-length
1028 (accessing-byte-array (node-buf) (load.b node-ptr))))
1029 (last-string (make-string last-string-length)))
1030 (declare (fixnum end-ptr node-ptr last-string-length)
1031 (dynamic-extent last-string))
1032 (debug-break "About to fill last-string")
1033 (if insert-middle
1034 (setq last-string key-string)
1035 (%copy-byte-array-portion node-buf (1+ node-ptr) last-string-length
1036 last-string 0))
1037 (if insert-not-right
1038 ; New entry goes in node or there is no new entry & this is a non-leaf node
1039 (let ((bytes-to-shift 0))
1040 (declare (fixnum bytes-to-shift))
1041 (setq node-ptr (- (+ node-buf-offset $btree_data used) right-used))
1042 (debug-break "Before first copy-byte-array-portion")
1043 (%copy-byte-array-portion
1044 node-buf node-ptr right-used right-buf (+ right-offset $btree_data) right-page)
1045 (when insert-left
1046 (setq node-ptr (+ node-buf-offset offset)
1047 bytes-to-shift (- new-used (- offset $btree_data) size))
1048 (debug-break "About to open up node")
1049 (%copy-byte-array-portion
1050 node-buf node-ptr bytes-to-shift node-buf (+ node-ptr size))
1051 (%store-btree-entry node-buf node-ptr node-page
1052 key-string key-length value value-imm? size)))
1053 ; New entry goes in right-node. leaf-p is true.
1054 (let* ((bytes-to-copy (- right-used size))
1055 (bytes-before-offset (- offset $btree_data (- new-used 4)))
1056 (bytes-after-offset (- bytes-to-copy bytes-before-offset))
1057 (right-ptr (+ right-offset $btree_data)))
1058 (declare (fixnum bytes-to-copy bytes-before-offset bytes-after-offset right-ptr))
1059 (setq node-ptr (- (+ node-buf-offset $btree_data used) bytes-to-copy))
1060 (debug-break "Before first copy-byte-array-portion when insert goes right")
1061 (%copy-byte-array-portion
1062 node-buf node-ptr bytes-before-offset right-buf right-ptr right-page)
1063 (incf node-ptr bytes-before-offset)
1064 (incf right-ptr bytes-before-offset)
1065 (%store-btree-entry right-buf right-ptr right-page
1066 key-string key-length value value-imm? size)
1067 (incf right-ptr size)
1068 (%copy-byte-array-portion
1069 node-buf node-ptr bytes-after-offset right-buf right-ptr right-page)))
1070 (accessing-byte-array (node-buf nil node-page)
1071 (let ((parent-offset (%btree-search-for-subnode disk-cache parent node)))
1072 (unless parent-offset
1073 (error "Couldn't find node ~s in parent node ~s" node parent))
1074 (setq node-ptr (+ node-buf-offset $btree_data new-used -4))
1075 (debug-break "About to fix up node")
1076 (if leaf-p
1077 (store.p right-node node-ptr)
1078 (when insert-middle
1079 (store.p value node-ptr value-imm?)))
1080 (incf node-ptr 4)
1081 (accessing-disk-cache (disk-cache parent)
1082 (store.p right-node parent-offset))
1083 (fill.b node-ptr 0 (- used new-used))
1084 (debug-break "About to insert in parent node")
1085 (%btree-insert-in-node
1086 disk-cache btree parent parent-offset last-string node nil last-string-length))))
1087 (accessing-byte-array (node-buf node-buf-offset node-page)
1088 (store.w new-count $btree_count)
1089 (store.w (- (+ free used) new-used) $btree_free)
1090 (store.w new-used $btree_used))
1091 (accessing-byte-array (right-buf right-offset right-page)
1092 (store.w right-count $btree_count)
1093 (let ((total (+ (load.uw $btree_used) (load.uw $btree_free))))
1094 (store.w (- total right-used) $btree_free)
1095 (store.w right-used $btree_used)))
1096 (unless leaf-p
1097 (%btree-update-childrens-parents disk-cache right-node))
1098 (debug-break "Done with %split-node")))
1099 :split)))
1100
1101; Attempt to shift the node left enough to make room for the new key-string
1102; Works for non-leaf-nodes as well
1103; Non-leaf nodes are harder since the shifting has to go through the parent.
1104; This function exists because without it the space utilization is only 51%.
1105; There is guaranteed (due to %create-insertion-path) to be enough room in the parent.
1106; Returns true if it succeeded.
1107; Otherwise, makes no changes and returns nil.
1108(defun %shift-node-left (disk-cache btree node free used leaf-p
1109 offset key-string value value-imm? key-length size)
1110 ;(let ((*enable-debug-break* (not leaf-p)))
1111 (unless offset
1112 ; If we're not inserting, we need to make room for the maximum node size
1113 (setq size (dc-btree-max-key-size disk-cache btree)))
1114 (unless (%btree-root-node-p disk-cache node)
1115 (with-locked-page (disk-cache node t node-buf node-buf-offset nil node-page)
1116 (declare (fixnum node-buf-offset))
1117 (accessing-byte-array (node-buf node-buf-offset node-page)
1118 (let* ((parent (load.p $btree_parent)))
1119 (multiple-value-bind (node-offset left-offset)
1120 (%btree-search-for-subnode disk-cache parent node)
1121 (when left-offset
1122 (let* ((left-node (accessing-disk-cache (disk-cache parent) (load.p left-offset)))
1123 (left-free (accessing-disk-cache (disk-cache left-node) (load.uw $btree_free)))
1124 (new-left-free left-free)
1125 (left-used (accessing-disk-cache (disk-cache left-node) (load.uw $btree_used)))
1126 (new-left-used left-used)
1127 (new-free free)
1128 (new-used used)
1129 (count (load.uw $btree_count))
1130 (sizes (make-array (the fixnum (1+ count))))
1131 (bytes-moved 0)
1132 (bytes-received 0)
1133 (bytes-needed (- size free))
1134 (bytes-to-offset (if offset (- offset $btree_data) most-positive-fixnum))
1135 (count-diff 0)
1136 (last-shifted-entry-size 0)
1137 (initial-parent-size (- node-offset left-offset))
1138 (parent-size initial-parent-size)
1139 (new-parent-size (if leaf-p 0 parent-size)))
1140 (declare (fixnum left-free new-left-free left-used new-left-used
1141 free new-free used new-used count
1142 bytes-moved bytes-needed bytes-to-offset
1143 count-diff last-shifted-entry-size
1144 initial-parent-size parent-size new-parent-size)
1145 (dynamic-extent sizes))
1146 (%lookup-node-sizes disk-cache node sizes count)
1147 (with-locked-page (disk-cache left-node t left-buf left-buf-offset nil left-page)
1148 (declare (fixnum left-buf-offset))
1149 (with-locked-page (disk-cache parent t parent-buf parent-buf-offset nil parent-page)
1150 (declare (fixnum parent-buf-offset))
1151 (labels ((shift-left (new-entry-in-node)
1152 (decf new-used bytes-moved)
1153 (incf new-free bytes-moved)
1154 (incf new-left-used bytes-received)
1155 (decf new-left-free bytes-received)
1156 (let* ((node-ptr (+ node-buf-offset $btree_data))
1157 (left-ptr (+ left-buf-offset $btree_data left-used -4))
1158 (bytes-to-last-string (+ (- bytes-moved last-shifted-entry-size) 4))
1159 (bytes-to-copy (if (or leaf-p (not new-entry-in-node))
1160 bytes-moved
1161 bytes-to-last-string)))
1162 (declare (fixnum node-ptr left-ptr bytes-to-last-string bytes-to-copy))
1163 (debug-break "About to copy bytes left")
1164 (unless leaf-p
1165 (let ((parent-ptr (+ left-offset parent-buf-offset 4))
1166 (parent-bytes (- parent-size 4)))
1167 (declare (fixnum parent-ptr parent-bytes))
1168 (incf left-ptr 4)
1169 (%copy-byte-array-portion parent-buf parent-ptr parent-bytes
1170 left-buf left-ptr left-page)
1171 (incf left-ptr parent-bytes)))
1172 (%copy-byte-array-portion node-buf node-ptr bytes-to-copy
1173 left-buf left-ptr left-page)
1174 (incf left-ptr bytes-to-copy)
1175 (when leaf-p
1176 (accessing-byte-array (left-buf nil left-page)
1177 (store.p node left-ptr)))
1178 (if new-entry-in-node
1179 (let ((node-ptr (+ node-ptr bytes-to-last-string))
1180 (parent-ptr (open-parent last-shifted-entry-size)))
1181 (declare (fixnum node-ptr parent-ptr))
1182 (debug-break "About to copy node to parent")
1183 (%copy-byte-array-portion node-buf node-ptr (- last-shifted-entry-size 4)
1184 parent-buf parent-ptr parent-page))
1185 (let* ((parent-ptr (open-parent size))
1186 (key-size (1+ key-length))
1187 (fill-count (- (normalize-size key-size 4) key-size)))
1188 (declare (fixnum parent-ptr key-size fill-count))
1189 (debug-break "About to enter key-string in parent")
1190 (accessing-byte-array (parent-buf 0 parent-page)
1191 (store.b key-length parent-ptr))
1192 (incf parent-ptr)
1193 (%copy-byte-array-portion key-string 0 key-length
1194 parent-buf parent-ptr parent-page)
1195 (incf parent-ptr key-length)
1196 (unless (eql 0 fill-count)
1197 (accessing-byte-array (parent-buf 0 parent-page)
1198 (fill.b parent-ptr 0 fill-count)))))
1199 (debug-break "About to shift node contents left")
1200 (%copy-byte-array-portion node-buf (+ node-ptr bytes-moved) new-used
1201 node-buf node-ptr node-page)
1202 (incf node-ptr new-used)
1203 (accessing-byte-array (node-buf nil node-page)
1204 (fill.b node-ptr 0 bytes-moved))
1205 (debug-break "Exiting shift-left")
1206 ))
1207 (update-free-and-used ()
1208 (accessing-byte-array (node-buf node-buf-offset node-page)
1209 (store.w new-used $btree_used)
1210 (store.w new-free $btree_free)
1211 (store.w (- (load.uw $btree_count) count-diff) $btree_count))
1212 (accessing-byte-array (left-buf left-buf-offset left-page)
1213 (store.w new-left-used $btree_used)
1214 (store.w new-left-free $btree_free)
1215 (store.w (+ (load.uw $btree_count) count-diff) $btree_count))
1216 (unless leaf-p
1217 (%btree-update-childrens-parents
1218 disk-cache left-node (+ left-buf-offset $btree_data left-used initial-parent-size -4)))
1219 (debug-break "Free and used updated")
1220 )
1221 (open-parent (size)
1222 (accessing-byte-array (parent-buf parent-buf-offset parent-page)
1223 (let* ((old-size (- node-offset left-offset))
1224 (size-diff (- old-size size))
1225 (parent-used (load.uw $btree_used))
1226 (parent-free (load.uw $btree_free))
1227 (ptr (+ parent-buf-offset node-offset))
1228 (bytes-to-move (- parent-used (- node-offset $btree_data))))
1229 (declare (fixnum old-size size-diff parent-used parent-free
1230 ptr bytes-to-move))
1231 (unless (eql size-diff 0)
1232 (debug-break "About to shift parent tail")
1233 (%copy-byte-array-portion
1234 parent-buf ptr bytes-to-move
1235 parent-buf (- ptr size-diff) parent-page)
1236 (when (> size-diff 0)
1237 (incf ptr (- bytes-to-move size-diff))
1238 (accessing-byte-array (parent-buf nil parent-page)
1239 (fill.b ptr 0 size-diff)))
1240 (store.w (- parent-used size-diff) $btree_used)
1241 (store.w (+ parent-free size-diff) $btree_free))
1242 (+ parent-buf-offset left-offset 4)))))
1243 (declare (dynamic-extent #'shift-left #'update-free-and-used
1244 #'open-parent))
1245 (dotimes (i count (error "Didn't run over offset"))
1246 (when (>= bytes-received left-free)
1247 ; Ran out of room in left node
1248 (return nil))
1249 (when (>= bytes-moved bytes-needed)
1250 ; The new entry now fits in node
1251 (setq count-diff i)
1252 (shift-left t)
1253 (update-free-and-used)
1254 (when offset
1255 (%btree-insert-in-node disk-cache btree node (- offset bytes-moved)
1256 key-string value value-imm? key-length))
1257 (debug-break "Done with insertion in node")
1258 ; #+bill (check-btree-consistency disk-cache btree)
1259 (return t))
1260 (when (>= bytes-moved bytes-to-offset)
1261 (unless (eql bytes-moved bytes-to-offset)
1262 (error "Inconsistency: offset was not at an entry boundary"))
1263 (unless (or (not leaf-p) (>= new-left-free (+ size bytes-received)))
1264 (debug-break "Couldn't shift left")
1265 (return nil))
1266 ; The new entry fits at the end of left-node
1267 (setq count-diff i)
1268 (shift-left nil)
1269 (when leaf-p
1270 (let ((left-ptr (+ left-buf-offset $btree_data new-left-used -4)))
1271 (declare (fixnum left-ptr))
1272 (debug-break "Storing new entry in left neighbor")
1273 (%store-btree-entry left-buf left-ptr left-page
1274 key-string key-length value value-imm? size)
1275 (incf left-ptr size)
1276 (accessing-byte-array (left-buf)
1277 (store.p node left-ptr))
1278 (incf new-left-used size)
1279 (decf new-left-free size)
1280 (accessing-byte-array (left-buf left-buf-offset)
1281 (store.w (1+ (load.uw $btree_count)) $btree_count))))
1282 (update-free-and-used)
1283 (debug-break "Done with insertion in left neighbor")
1284 ;#+bill (check-btree-consistency disk-cache btree)
1285 (return t))
1286 (setq last-shifted-entry-size (aref sizes i))
1287 (if leaf-p
1288 (progn
1289 (incf bytes-moved last-shifted-entry-size)
1290 (incf bytes-received last-shifted-entry-size))
1291 (progn
1292 (incf bytes-moved last-shifted-entry-size)
1293 (incf bytes-received new-parent-size)
1294 (setq new-parent-size last-shifted-entry-size)))))))))))))))
1295
1296
1297; Attempt to shift the node right enough to make room for the new key-string
1298; This is necessary because inserting in reverse order foils %shift-node-left
1299; This doesn't handle the non-leaf case yet. It's hardly worth it.
1300(defun %shift-node-right (disk-cache btree node free used leaf-p
1301 offset key-string value value-imm? key-length size)
1302 (declare (fixnum free used))
1303 ;(return-from %shift-node-right nil) ; not yet debugged.
1304 (when (and leaf-p (not (%btree-root-node-p disk-cache node)))
1305 (with-locked-page (disk-cache node t node-buf node-buf-offset nil node-page)
1306 (accessing-byte-array (node-buf node-buf-offset node-page)
1307 (let* ((parent (load.p $btree_parent)))
1308 (multiple-value-bind (node-offset left-offset right-offset)
1309 (%btree-search-for-subnode disk-cache parent node t)
1310 (declare (ignore left-offset))
1311 (when right-offset
1312 (let* ((right-node (accessing-disk-cache (disk-cache parent) (load.p right-offset)))
1313 (right-free (accessing-disk-cache (disk-cache right-node) (load.uw $btree_free)))
1314 (new-right-free right-free)
1315 (right-used (accessing-disk-cache (disk-cache right-node) (load.uw $btree_used)))
1316 (new-right-used right-used)
1317 (new-free free)
1318 (new-used used)
1319 (count (load.uw $btree_count))
1320 (sizes (make-array (the fixnum (1+ count))))
1321 (bytes-moved 0)
1322 (bytes-needed (- size free))
1323 (bytes-to-offset (- used (- offset $btree_data) 4))
1324 (count-diff 0))
1325 (declare (fixnum right-offset right-free new-right-free
1326 right-used new-right-used new-free new-used count
1327 bytes-moved bytes-needed bytes-to-offset
1328 count-diff)
1329 (dynamic-extent sizes))
1330 (%lookup-node-sizes disk-cache node sizes count)
1331 (with-locked-page (disk-cache right-node t right-buf right-buf-offset nil right-page)
1332 (flet ((shift-right (&optional (new-entry-size 0))
1333 (decf new-used bytes-moved)
1334 (incf new-free bytes-moved)
1335 (incf new-right-used bytes-moved)
1336 (decf new-right-free bytes-moved)
1337 (let ((node-ptr (+ node-buf-offset $btree_data new-used -4))
1338 (right-ptr (+ right-buf-offset $btree_data)))
1339 (declare (fixnum node-ptr right-ptr))
1340 (debug-break "About to copy bytes right")
1341 (%copy-byte-array-portion right-buf right-ptr right-used
1342 right-buf
1343 (+ right-ptr new-entry-size bytes-moved)
1344 right-page)
1345 (incf right-ptr new-entry-size)
1346 (%copy-byte-array-portion node-buf node-ptr bytes-moved
1347 right-buf right-ptr right-page)
1348 (accessing-byte-array (node-buf nil node-page)
1349 (store.p right-node node-ptr))
1350 (incf node-ptr 4)
1351 (accessing-byte-array (node-buf nil node-page)
1352 (fill.b node-ptr 0 bytes-moved))
1353 (debug-break "Exiting shift-right")
1354 ))
1355 (update-free-and-used ()
1356 (accessing-byte-array (node-buf node-buf-offset node-page)
1357 (store.w new-used $btree_used)
1358 (store.w new-free $btree_free)
1359 (store.w (- (load.uw $btree_count) count-diff) $btree_count))
1360 (accessing-byte-array (right-buf right-buf-offset right-page)
1361 (store.w new-right-used $btree_used)
1362 (store.w new-right-free $btree_free)
1363 (store.w (+ (load.uw $btree_count) count-diff) $btree_count))
1364 (debug-break "Free and used updated")
1365 )
1366 (replace-parent-entry (i &optional string (size (aref sizes i)))
1367 (declare (fixnum size))
1368 (with-locked-page (disk-cache parent t parent-buf parent-buf-offset nil parent-page)
1369 (accessing-byte-array (parent-buf parent-buf-offset parent-page)
1370 (let* ((last-entry-ptr (+ node-buf-offset $btree_data
1371 (- new-used size)))
1372 (length (if string
1373 (length string)
1374 (accessing-byte-array (node-buf)
1375 (load.b last-entry-ptr))))
1376 (temp-string (make-string length)))
1377 (declare (fixnum last-entry-ptr length)
1378 (dynamic-extent temp-string))
1379 (unless string
1380 (debug-break "Filling string")
1381 (setq string temp-string)
1382 (%copy-byte-array-portion node-buf (1+ last-entry-ptr) length
1383 string 0))
1384 (let* ((old-size (- right-offset node-offset))
1385 (size-diff (- old-size size))
1386 (ptr (+ parent-buf-offset right-offset)))
1387 (declare (fixnum old-size size-diff ptr))
1388 (unless (eql size-diff 0)
1389 (let* ((parent-used (load.uw $btree_used))
1390 (parent-free (load.uw $btree_free))
1391 (bytes-to-move (- parent-used (- right-offset $btree_data))))
1392 (declare (fixnum parent-used parent-free bytes-to-move))
1393 (debug-break "About to shift parent tail")
1394 (%copy-byte-array-portion
1395 parent-buf ptr bytes-to-move
1396 parent-buf (- ptr size-diff) parent-page)
1397 (when (> size-diff 0)
1398 (incf ptr (- bytes-to-move size-diff))
1399 (accessing-byte-array (parent-buf nil parent-page)
1400 (fill.b ptr 0 size-diff)))
1401 (store.w (- parent-used size-diff) $btree_used)
1402 (store.w (+ parent-free size-diff) $btree_free)))
1403 (setq ptr (+ parent-buf-offset node-offset))
1404 (debug-break "About to enter node string in parent")
1405 (%store-btree-entry parent-buf ptr parent-page
1406 string length (load.p node-offset) nil size)))))))
1407 (declare (dynamic-extent #'shift-right #'update-free-and-used
1408 #'replace-parent-entry))
1409 (loop for i from (1- count) downto 0 do
1410 (when (>= bytes-moved right-free)
1411 ; Ran out of room in right node
1412 (return nil))
1413 (when (>= bytes-moved bytes-needed)
1414 ; The new entry now fits in node
1415 (setq count-diff (- count i 1))
1416 (shift-right)
1417 (update-free-and-used)
1418 (if (eql (- offset $btree_data) (- new-used 4))
1419 (replace-parent-entry i key-string size)
1420 (replace-parent-entry i))
1421 (%btree-insert-in-node disk-cache btree node offset
1422 key-string value value-imm? key-length)
1423 (debug-break "Done with insertion in node")
1424 ; #+bill (check-btree-consistency disk-cache btree)
1425 (return t))
1426 (when (>= bytes-moved bytes-to-offset)
1427 (unless (eql bytes-moved bytes-to-offset)
1428 (error "Inconsistency: offset was not at an entry boundary"))
1429 (unless (>= new-right-free (+ size bytes-moved))
1430 (debug-break "Couldn't shift right")
1431 (return nil))
1432 ; The new entry fits at the beginning of right-node
1433 (setq count-diff (- count i 1))
1434 (shift-right size)
1435 (let ((right-ptr (+ right-buf-offset $btree_data)))
1436 (declare (fixnum right-ptr))
1437 (debug-break "Storing new entry in right neighbor")
1438 (%store-btree-entry right-buf right-ptr right-page
1439 key-string key-length value value-imm? size)
1440 (incf new-right-used size)
1441 (decf new-right-free size)
1442 (update-free-and-used)
1443 (accessing-byte-array (right-buf right-buf-offset)
1444 (store.w (1+ (load.uw $btree_count)) $btree_count))
1445 (replace-parent-entry i)
1446 (debug-break "Done with insertion in right neighbor")
1447 ; #+bill (check-btree-consistency disk-cache btree)
1448 (return t)))
1449 (incf bytes-moved (aref sizes i)))))))))))))
1450
1451; Much like btree-find-leaf-node, but it makes sure there's room
1452; for an entry of max-key-size in every node on the way to the leaf.
1453; This is simpler than letting node splits "bubble up" and it also
1454; works better in a multi-processing environment (this code does
1455; not yet work in a multi-processing environment, but using this
1456; algorithm will allow multiple processes to access a single btree
1457; at the same time).
1458(defun %create-insertion-path (disk-cache btree key-string max-key-size)
1459 (let ((node (dc-%svref disk-cache btree $btree.root))
1460 (case-sensitive? (not (logbitp $btree-type_string-equal-bit
1461 (dc-%svref disk-cache btree $btree.type)))))
1462 (loop
1463 (accessing-disk-cache (disk-cache node)
1464 (let* ((offset (%btree-search-node
1465 disk-cache node key-string case-sensitive?))
1466 (flags (load.w $btree_flags))
1467 (leaf-p (logbitp $btree_flags.leaf-bit flags))
1468 (root-p (logbitp $btree_flags.root-bit flags)))
1469 (when leaf-p
1470 (when root-p
1471 (%make-new-root-node disk-cache btree node))
1472 (return t))
1473 (let ((free (load.uw $btree_free)))
1474 (if (>= free max-key-size)
1475 (setq node (read-long disk-cache (+ node offset)))
1476 (progn
1477 (when root-p
1478 (%make-new-root-node disk-cache btree node))
1479 (%split-node disk-cache btree node free (load.uw $btree_used) nil)
1480 (setq node (load.p $btree_parent)))))) ; may have moved to new right neighbor
1481 (require-satisfies dc-vector-subtype-p
1482 disk-cache node $v_btree-node)))))
1483
1484; node is the current root node. Creates a new root node
1485; with node as its only child.
1486(defun %make-new-root-node (disk-cache btree node)
1487 (let ((root (dc-cons-btree-node disk-cache btree btree (ash 1 $btree_flags.root-bit))))
1488 (accessing-disk-cache (disk-cache root)
1489 (store.p node $btree_data))
1490 (setf (dc-%svref disk-cache btree $btree.root) root)
1491 (setf (dc-%svref disk-cache btree $btree.depth t)
1492 (1+ (dc-%svref disk-cache btree $btree.depth)))
1493 (accessing-disk-cache (disk-cache node)
1494 (store.p root $btree_parent)
1495 (store.w (logandc1 (ash 1 $btree_flags.root-bit) (load.w $btree_flags))
1496 $btree_flags))))
1497
1498; This will only ever do anything if you have large keys
1499; Maybe it's not worth the effort, but it doesn't happen very often
1500; (unless there are large keys, and then it's worth it)
1501(defun %sew-up-insertion-path (disk-cache btree node leaf-p)
1502 (accessing-disk-cache (disk-cache node)
1503 (let* ((free (load.uw $btree_free))
1504 (used (load.uw $btree_used))
1505 (count (load.uw $btree_count))
1506 (unmerged-node (%btree-merge-with-neighbors
1507 disk-cache btree node free used count leaf-p)))
1508 (when unmerged-node
1509 (accessing-disk-cache (disk-cache unmerged-node)
1510 (%sew-up-insertion-path disk-cache btree (load.p $btree_parent) nil))))))
1511
1512;; Store a single entry into a buffer.
1513(defun %store-btree-entry (buf offset page string string-length value value-imm? &optional size)
1514 (declare (fixnum offset string-length))
1515 (let ((p offset))
1516 (declare (fixnum p))
1517 (accessing-byte-array (buf nil page)
1518 (store.p value p value-imm?)
1519 (store.b string-length (incf p 4))
1520 (store.string string (incf p 1) string-length)
1521 (incf p string-length)
1522 (let* ((bytes (+ 5 string-length))
1523 (filler (- (or size (setq size (normalize-size bytes 4)))
1524 bytes)))
1525 (declare (fixnum bytes filler))
1526 (when (> filler 0)
1527 ; This is for us poor humans.
1528 (fill.b p 0 filler)))))
1529 size)
1530
1531
1532;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1533;;
1534;; Code to support deletion
1535;;
1536
1537(defun %btree-delete-from-node (disk-cache btree node offset leaf-p)
1538 (declare (fixnum offset))
1539 (with-locked-page (disk-cache node t node-buf node-buf-offset nil node-page)
1540 (accessing-byte-array (node-buf node-buf-offset node-page)
1541 (let* ((size (normalize-size (+ 5 (load.b (+ offset 4))) 4))
1542 (free (load.uw $btree_free))
1543 (new-free (+ free size))
1544 (used (load.uw $btree_used))
1545 (new-used (- used size))
1546 (count (load.uw $btree_count))
1547 (new-count (1- count))
1548 (p (+ node-buf-offset offset))
1549 (bytes-to-copy (- used size (- offset $btree_data))))
1550 (declare (fixnum size free new-free used new-used count new-count p bytes-to-copy))
1551 (debug-break "About to delete from node")
1552 (%copy-byte-array-portion node-buf (+ p size) bytes-to-copy
1553 node-buf p node-page)
1554 (fill.b (+ $btree_data new-used) 0 size)
1555 (store.w new-free $btree_free)
1556 (store.w new-used $btree_used)
1557 (store.w new-count $btree_count)
1558 (debug-break "Deleted from node")
1559 (%btree-merge-with-neighbors
1560 disk-cache btree node new-free new-used new-count leaf-p)))))
1561
1562; Returns the last node on the way up the parent chain that could not be
1563; merged with either neighbor, or NIL if it got all the way to the root.
1564
1565(defun %btree-merge-with-neighbors (disk-cache btree node free used count leaf-p)
1566 (if (%btree-root-node-p disk-cache node)
1567 (when (eql 0 count)
1568 ; delete the root node, unless it is also the leaf node
1569 (unless (%btree-leaf-node-p disk-cache node)
1570 (accessing-disk-cache (disk-cache node)
1571 (unless (eql 4 (load.uw $btree_used))
1572 (error "Inconsistency: root should be empty, but isn't"))
1573 (let ((new-root (load.p $btree_data)))
1574 (debug-break "About to install new root")
1575 (setf (dc-%svref disk-cache btree $btree.root) new-root)
1576 (setf (dc-%svref disk-cache btree $btree.depth t)
1577 (1- (dc-%svref disk-cache btree $btree.depth)))
1578 (accessing-disk-cache (disk-cache new-root)
1579 (store.p btree $btree_parent)
1580 (store.w (logior (ash 1 $btree_flags.root-bit)
1581 (load.uw $btree_flags))
1582 $btree_flags))
1583 (debug-break "Installed new root")
1584 (dc-free-btree-node disk-cache btree node)
1585 nil))))
1586 (let ((parent (accessing-disk-cache (disk-cache node) (load.p $btree_parent))))
1587 (multiple-value-bind (node-offset left-offset) (%btree-search-for-subnode disk-cache parent node)
1588 (unless node-offset
1589 (error "Inconsistency: didn't find node: ~s in parent: ~s" node parent))
1590 (accessing-disk-cache (disk-cache parent)
1591 (when left-offset
1592 (let ((left-node (load.p left-offset))
1593 ; Initializes parent size to -4 if leaf-p, which is correct since
1594 ; when merging two nodes, the pointer from the left node to the
1595 ; right node is eliminated, making the data take 4 bytes less.
1596 (parent-size (- (if leaf-p 0 (- node-offset left-offset)) 4)))
1597 (declare (fixnum parent-size))
1598 (accessing-disk-cache (disk-cache left-node)
1599 (let ((left-free (load.uw $btree_free)))
1600 (declare (fixnum left-free))
1601 (when (>= left-free (+ used parent-size))
1602 (debug-break "About to merge with left neighbor")
1603 (return-from %btree-merge-with-neighbors
1604 (%btree-merge-nodes
1605 disk-cache btree left-node node parent left-offset node-offset leaf-p)))))))
1606 (let ((end-offset (+ $btree_data -4 (load.uw $btree_used))))
1607 (declare (fixnum end-offset))
1608 (when (< node-offset end-offset)
1609 (let* ((right-length (load.b (+ node-offset 4)))
1610 (right-offset (+ node-offset (normalize-size (+ 5 right-length) 4)))
1611 (right-node (load.p right-offset))
1612 (parent-size (- (if leaf-p 0 (- right-offset node-offset)) 4)))
1613 (declare (fixnum right-length right-offset parent-size))
1614 (accessing-disk-cache (disk-cache right-node)
1615 (let ((right-used (load.uw $btree_used)))
1616 (when (>= free (+ right-used parent-size))
1617 (debug-break "About to merge with right neighbor")
1618 (return-from %btree-merge-with-neighbors
1619 (%btree-merge-nodes
1620 disk-cache btree node right-node parent node-offset right-offset leaf-p))))))))
1621 node)))))
1622
1623; We know that there's room to merge the nodes. Do it.
1624; It's important that this code merges into the LEFT node as that ensures that the
1625; first leaf node remains constant (the btree points at it and p-map-btree relies on that fact).
1626; (You could instead update $btree.first-leaf as necessary).
1627(defun %btree-merge-nodes (disk-cache btree left-node right-node parent left-offset right-offset leaf-p)
1628 (declare (fixnum left-offset right-offset))
1629 (with-locked-page (disk-cache parent nil parent-buf parent-buf-offset)
1630 (with-locked-page (disk-cache left-node t left-buf left-buf-offset nil left-page)
1631 (with-locked-page (disk-cache right-node nil right-buf right-buf-offset)
1632 (accessing-byte-array (left-buf left-buf-offset)
1633 (let* ((p (+ left-buf-offset $btree_data (load.uw $btree_used)))
1634 (right-used (accessing-byte-array (right-buf right-buf-offset)
1635 (load.uw $btree_used)))
1636 (count-inc (accessing-byte-array (right-buf right-buf-offset)
1637 (load.uw $btree_count)))
1638 (used-inc right-used))
1639 (declare (fixnum p right-used count-inc used-inc))
1640 (if leaf-p
1641 (progn
1642 (decf p 4)
1643 (decf used-inc 4))
1644 (let ((size (- right-offset left-offset 4)))
1645 (declare (fixnum size))
1646 (debug-break "About to copy parent info")
1647 (%copy-byte-array-portion parent-buf (+ parent-buf-offset left-offset 4) size
1648 left-buf p left-page)
1649 (incf used-inc size)
1650 (incf count-inc)
1651 (incf p size)))
1652 (debug-break "About to copy right-buf info")
1653 (%copy-byte-array-portion right-buf (+ right-buf-offset $btree_data) right-used
1654 left-buf p left-page)
1655 (store.w (+ (load.uw $btree_used) used-inc) $btree_used)
1656 (store.w (- (load.uw $btree_free) used-inc) $btree_free)
1657 (store.w (+ (load.uw $btree_count) count-inc) $btree_count)
1658 (dc-free-btree-node disk-cache btree right-node)
1659 (accessing-byte-array (parent-buf parent-buf-offset)
1660 (store.p left-node right-offset))
1661 (unless leaf-p
1662 (%btree-update-childrens-parents disk-cache left-node p))
1663 (debug-break "Nodes merged")
1664 (%btree-delete-from-node disk-cache btree parent left-offset nil)))))))
1665
1666;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1667;;;
1668;;; Consistency checking and statistics gathering
1669;;;
1670
1671(defun check-btree (btree)
1672 (check-btree-consistency (pheap-disk-cache (pptr-pheap btree)) (pptr-pointer btree)))
1673
1674; Checks consistency and returns how full the btree is.
1675(defun check-btree-consistency (disk-cache btree &optional check-nodes-and-count?)
1676 (let ((root (accessing-disk-cache (disk-cache)
1677 (svref.p btree $btree.root))))
1678 (multiple-value-bind (free used nodes count leaf-free leaf-used leaf-nodes non-leaf-free non-leaf-used non-leaf-nodes)
1679 (check-btree-node-consistency disk-cache root btree)
1680 (when check-nodes-and-count?
1681 (let ((missing-nodes (- (accessing-disk-cache (disk-cache)
1682 (svref.p btree $btree.nodes))
1683 nodes)))
1684 (unless (eql missing-nodes 0)
1685 (cerror "Continue" "~d. missing nodes" missing-nodes)))
1686 (let ((missing-entries (- (accessing-disk-cache (disk-cache)
1687 (svref.p btree $btree.count))
1688 count)))
1689 (unless (eql 0 missing-entries)
1690 (cerror "Continue" "~d. missing entries" missing-entries))))
1691 (values (if (eql used 0) 0 (/ used (float (+ free used))))
1692 nodes
1693 count
1694 (if (eql leaf-used 0) 0 (/ leaf-used (float (+ leaf-free leaf-used))))
1695 leaf-nodes
1696 (if (eql non-leaf-used 0) 0 (/ non-leaf-used (float (+ non-leaf-free non-leaf-used))))
1697 non-leaf-nodes))))
1698
1699(defun check-btree-node-consistency (disk-cache node parent)
1700 (require-satisfies dc-vector-subtype-p disk-cache node $v_btree-node)
1701 (accessing-disk-cache (disk-cache node)
1702 (let* ((vector-size (%vector-size.p node))
1703 (free (load.uw $btree_free))
1704 (used (load.uw $btree_used))
1705 (count (load.uw $btree_count))
1706 (nodes 1)
1707 (sizes (make-array (the fixnum (1+ count))))
1708 (leaf? (%btree-leaf-node-p disk-cache node))
1709 (total-count (if leaf? count 0))
1710 (p $btree_data)
1711 (leaf-free 0)
1712 (leaf-used 0)
1713 (leaf-nodes 0)
1714 (non-leaf-free 0)
1715 (non-leaf-used 0)
1716 (non-leaf-nodes 0))
1717 (declare (fixnum free used count p leaf-free leaf-used leaf-nodes non-leaf-free non-leaf-used non-leaf-nodes)
1718 (dynamic-extent sizes))
1719 (if leaf?
1720 (setq leaf-free free leaf-used used leaf-nodes 1)
1721 (setq non-leaf-free free non-leaf-used used non-leaf-nodes 1))
1722 (unless (eql parent (load.l $btree_parent))
1723 (error "parent should be: #x~x, was: #x~x" parent (load.l $btree_parent)))
1724 (unless (eql (- vector-size (- $btree_data $v_data)) (+ free used))
1725 (cerror "Continue."
1726 "~&(+ free used) is wrong. Node: #x~x, free: #x~x, used: #x~x~%"
1727 node free used))
1728 (%lookup-node-sizes disk-cache node sizes count)
1729 (setf (aref sizes count) 0)
1730 (unless leaf?
1731 (dotimes (i (1+ count))
1732 (multiple-value-bind (c-free c-used c-nodes c-count
1733 c-leaf-free c-leaf-used c-leaf-nodes
1734 c-non-leaf-free c-non-leaf-used c-non-leaf-nodes)
1735 (check-btree-node-consistency disk-cache (load.l p) node)
1736 (incf free c-free)
1737 (incf used c-used)
1738 (incf leaf-free c-leaf-free)
1739 (incf leaf-used c-leaf-used)
1740 (incf leaf-nodes c-leaf-nodes)
1741 (incf non-leaf-free c-non-leaf-free)
1742 (incf non-leaf-used c-non-leaf-used)
1743 (incf nodes c-nodes)
1744 (incf non-leaf-nodes c-non-leaf-nodes)
1745 (incf total-count c-count))
1746 (incf p (aref sizes i))))
1747 (values free used nodes total-count leaf-free leaf-used leaf-nodes non-leaf-free non-leaf-used non-leaf-nodes))))
1748
1749
1750#|
1751; Test code. Stores symbols in a btree.
1752(defun init-temp-btree ()
1753 (declare (special pheap dc b))
1754 (when (boundp 'pheap)
1755 (close-pheap pheap))
1756 (delete-file "temp.pheap")
1757 (create-pheap "temp.pheap")
1758 (setq pheap (open-pheap "temp.pheap")
1759 dc (pheap-disk-cache pheap))
1760 (dolist (w (windows :class 'inspector::inspector-window))
1761 (window-close w))
1762 (setq b (dc-make-btree dc))
1763 #+ignore
1764 (let ((w (inspect dc)))
1765 (set-view-size w #@(413 384))
1766 (scroll-to-address (inspector::inspector-view w) (dc-%svref dc b $btree.root))))
1767
1768(defvar *symbols* nil)
1769(defvar *value-offset* 0)
1770
1771(defun *symbols* ()
1772 (let ((syms *symbols*))
1773 (unless syms
1774 (let ((hash (make-hash-table :test 'equal)))
1775 (do-symbols (s)
1776 (unless (gethash (symbol-name s) hash)
1777 (setf (gethash (symbol-name s) hash) t)
1778 (push s syms))))
1779 (setq *symbols* syms
1780 *value-offset* 0))
1781 syms))
1782
1783(defun store-symbols (&optional (step-sym 0) check? (check-sym 0))
1784 (declare (special dc b))
1785 (let ((syms (*symbols*))
1786 (check-check-sym? nil)
1787 (i 0))
1788 (dolist (s syms)
1789 (let ((string (symbol-name s))
1790 (value (+ i *value-offset*)))
1791 (if (eq s step-sym)
1792 (step
1793 (dc-btree-store dc b string (require-type value 'fixnum) t))
1794 (dc-btree-store dc b string (require-type value 'fixnum) t))
1795 (when (eql s check-sym) (setq check-check-sym? i))
1796 (incf i)
1797 (when check-check-sym?
1798 (unless (eql check-check-sym? (dc-btree-lookup dc b (symbol-name check-sym)))
1799 (cerror "Continue" "Can't find ~s" check-sym)))
1800 (when (and check? (or (not (fixnump check?))
1801 (eql 0 (mod i check?))))
1802 (format t "~&Checking ~d..." i)
1803 (check-symbols s)
1804 (terpri))))
1805 i))
1806
1807(defun check-symbols (&optional (upto-and-including 0))
1808 (declare (special dc b))
1809 (let ((i 0))
1810 (dolist (s (*symbols*))
1811 (let ((was (dc-btree-lookup dc b (symbol-name s)))
1812 (value (+ i *value-offset*)))
1813 (unless (eql was value)
1814 (cerror "Continue"
1815 "Sym: ~s, was: ~s, sb: ~s" s was value))
1816 (incf i)
1817 (when (eq s upto-and-including)
1818 (return))))
1819 i))
1820
1821(defun delete-symbols (&optional (count nil) (check-period nil))
1822 (declare (special dc b))
1823 (let ((check-count (or check-period most-positive-fixnum)))
1824 (dotimes (i (or count (length *symbols*)))
1825 (when (null *symbols*) (return))
1826 (incf *value-offset*)
1827 (dc-btree-delete dc b (symbol-name (pop *symbols*)))
1828 (when (<= (decf check-count) 0)
1829 (setq check-count check-period)
1830 (format t "~&Checking ~d..." i)
1831 (check-symbols)
1832 (terpri)))))
1833
1834(defun sort-syms-upto (sym)
1835 (let ((first-n (let ((res nil))
1836 (dolist (s *symbols* (error "Not found"))
1837 (push s res)
1838 (when (eq s sym) (return res))))))
1839 (sort first-n #'string<)))
1840
1841(defun btree-test (&optional (step-sym 0))
1842 (init-temp-btree)
1843 (store-symbols step-sym))
1844
1845(defun clear-disk-cache ()
1846 (unwind-protect
1847 (with-open-file (s "temp.temp" :direction :output :if-exists :overwrite)
1848 (file-length s (* 256 1024))
1849 (dotimes (i 512)
1850 (file-position s (* i 512))
1851 (tyo #\f s)))
1852 (delete-file "temp.temp")))
1853
1854(defun time-btree-store (&optional (swap-space-in-k 20))
1855 (let* ((syms (*symbols*))
1856 (syms-count (length syms))
1857 (index 0))
1858 (declare (fixnum index))
1859 (clear-disk-cache)
1860 (gc)
1861 (let ((time (get-internal-real-time)))
1862 (with-open-pheap (p "temp.pheap"
1863 :if-exists :supersede
1864 :if-does-not-exist :create
1865 :swapping-space (* swap-space-in-k 1024)
1866 :page-size 512)
1867 (let ((b (p-make-btree p)))
1868 (setf (root-object p) b)
1869 (dolist (s syms)
1870 (setf (p-btree-lookup b (symbol-name s)) (incf index))))
1871 (let ((total-time (/ (float (- (get-internal-real-time) time))
1872 internal-time-units-per-second))
1873 (file-length (with-open-file (s "temp.pheap") (file-length s))))
1874 (format t "~&Total time: ~d~%Elements: ~d~%time/element: ~d~%file length: ~d"
1875 total-time
1876 syms-count
1877 (/ total-time syms-count)
1878 file-length))))))
1879
1880(defun time-btree-read (&optional (swap-space-in-k 20))
1881 (let* ((syms (*symbols*))
1882 (syms-count (length syms)))
1883 (clear-disk-cache)
1884 (gc)
1885 (let ((time (get-internal-real-time)))
1886 (with-open-pheap (p "temp.pheap"
1887 :swapping-space (* swap-space-in-k 1024))
1888 (let ((b (root-object p)))
1889 (dolist (s syms)
1890 (p-btree-lookup b (symbol-name s)))))
1891 (let ((total-time (/ (float (- (get-internal-real-time) time))
1892 internal-time-units-per-second))
1893 (file-length (with-open-file (s "temp.pheap") (file-length s))))
1894 (format t "~&Total time: ~d~%Elements: ~d~%time/element: ~d~%file length: ~d"
1895 total-time
1896 syms-count
1897 (/ total-time syms-count)
1898 file-length)))))
1899
1900
1901|#
1902
1903#|
1904; Code to trace functions that were hard to debug.
1905(advise %btree-insert-in-node
1906 (destructuring-bind (dc b node offset key-string value &optional value-imm? (key-length (length key-string))) arglist
1907 (declare (ignore offset value value-imm?))
1908 (if (or (%btree-leaf-node-p dc node)
1909 (<= (normalize-size (+ 5 key-length))
1910 (accessing-disk-cache (dc node) (load.uw $btree_free))))
1911 (:do-it)
1912 (step (:do-it))))
1913 :when :around)
1914
1915(advise %balance-inner-node-after-deletion
1916 (step (:do-it))
1917 :when :around)
1918|#
1919;;; 1 3/10/94 bill 1.8d247
1920;;; 2 7/26/94 Derek 1.9d027
1921;;; 3 10/04/94 bill 1.9d071
1922;;; 4 11/01/94 Derek 1.9d085 Bill's Saving Library Task
1923;;; 5 11/03/94 Moon 1.9d086
1924;;; 2 2/18/95 RŽti 1.10d019
1925;;; 3 3/23/95 bill 1.11d010
1926;;; 4 4/19/95 bill 1.11d021
1927;;; 5 6/02/95 bill 1.11d040
Note: See TracBrowser for help on using the repository browser.