source: trunk/btrees.lisp @ 3

Revision 3, 94.3 KB checked in by gz, 9 years ago (diff)

Recovered version 0.961 from Sheldon Ball <s.ball@…>

  • Property svn:eol-style set to native
Line 
1;;;-*- Mode: Lisp; Package: WOOD -*-
2
3;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4;;;
5;;; btrees.lisp
6;;; B* trees with variable length keys for pheaps.
7;;;
8;;; Copyright © 1996-1997 Digitool, Inc.
9;;; Copyright © 1992-1995 Apple Computer, Inc.
10;;; All rights reserved.
11;;; Permission is given to use, copy, and modify this software provided
12;;; that Digitool is given credit in all derivative works.
13;;; This software is provided "as is". Digitool makes no warranty or
14;;; representation, either express or implied, with respect to this software,
15;;; its quality, accuracy, merchantability, or fitness for a particular
16;;; purpose.
17;;;
18
19;;; Key size is limited to 127 bytes with longer keys
20;;; being stored as strings (and requiring an extra disk access).
21;;; (longer strings are not yet implemented).
22
23;;;;;;;;;;;;;;;;;;;;;;;;;;
24;;;
25;;; Modification History
26;;;
27;;; 03/26/97 bill  check-btree-consistency avoids divide by zero errors.
28;;; -------------  0.96
29;;; 08/27/96 bill  Wrap an eval-when around the definition of *enable-debug-break* as
30;;;                suggested by Rainer Joswig.
31;;; -------------  0.95
32;;; 05/13/96 bill  mis-parenthesization of compare-string in %btree-search-node
33;;; -------------  0.94 = MCL-PPC 3.9
34;;; 03/26/96 bill  Don't use %schar in %btree-search-node, MCL-PPC type checks it.
35;;; 03/21/96 bill  Specify element-type of base-character to make-string calls
36;;;                Add an optional swap-space-in-k arg to time-btree-store & time-btree-read
37;;; -------------  0.93
38;;; 08/10/95 bill  p-do-btree macro is to a btree what dolist is to a list.
39;;; 07/02/95 bill  Fix fencepost in computation of fill-count in %shift-node-left
40;;;                Thanks to Horace Enea for providing the test case to track down this bug.
41;;; 05/25/95 bill  New parameter: *max-btree-node-size*.
42;;;                dc-cons-btree-node calls dc-allocate-new-btree-node instead of
43;;;                %dc-allocate-new-memory.
44;;;                New function: dc-allocate-new-btree-node, calls %dc-allocate-new-memory
45;;;                as before if the page size is <= *max-btree-node-size*. Otherwise, carves
46;;;                up a disk page into *max-btree-node-size* nodes, returns one of them, and
47;;;                adds the rest to the btree node free list.
48;;; 04/11/95 bill  new function: p-map-btree-keystrings.
49;;;                dc-map-btree uses compare-strings-function instead of string<
50;;;                for comparing with the TO arg.
51;;; -------------  0.9
52;;; 02/06/95 bill  Moon's idea to add binary search to %btree-search-node without
53;;;                changing the disk representation. That and doing the comparisons
54;;;                inline got speed improvement of a factor of 2.7 over the old linear
55;;;                search code.
56;;; 12/16/94 bill  Complete rewrite of the low-level insertion code.
57;;;                This fixes a bug found by Paul.Meurer@mi.uib.no and
58;;;                will make much easier the addition of binary search for nodes.
59;;; 10/28/94 Moon  Change without-interrupts to with-databases-locked.
60;;; 10/25/94 bill  (setf (p-btree-store ...) x) now returns x instead of its pptr.
61;;; 09/21/94 bill  without-interrupts as necessary for interlocking
62;;; -------------  0.8
63;;; 07/31/93 bill  %btree-insert-in-inner-node now goes out of line to
64;;;                %attempt-to-shift-left-inner-node & %attempt-to-shift-right-inner-node.
65;;;                %attempt-to-shift-right-inner-node is new code. I thought
66;;;                that I could get away with leaving it out, but I was wrong.
67;;; 03/29/93 bill  *forwarded-btree* in btree-find-leaf-node & dc-map-btree
68;;; 03/25/93 bill  %btree-split-inner-node - wrong page on one of the accessing-byte-array's
69;;;                Also, neglected to update last-middle-size when the parent entry went at the
70;;;                end of the new middle node. In this case, some of the copying was also extraneous.
71;;; -------------- 0.6
72;;; -------------- 0.5
73;;; 07/28/92 bill  make p-map-btree deal correctly with insertion or
74;;;                deletion while mapping.
75;;; 07/27/92 bill  p-clear-btree, p-map-btree
76;;; 06/30/92 bill  little bug in %split-btree-root
77;;; 06/26/92 bill  btree vector indices defs -> woodequ
78;;; 06/23/92 bill  Don't ignore type in p-make-btree
79;;; -------------- 0.1
80;;;
81
82;;;;;;;;;;;;;;;;;;;;;;;;;;
83;;;
84;;; To do:
85;;;
86;;; 1) Maybe.
87;;;    Replace the $btree_parent slot with $btree_mod-count for locking use.
88;;;    Updating parents at shift or split time is too expensive.
89;;;    Instead, pass around an ancestors list (stack-consed).
90;;;
91;;; 2) Implement keys longer than 127 bytes.
92
93(in-package :wood)
94
95(export '(p-make-btree p-make-string-equal-btree
96          p-btree-lookup p-btree-store p-btree-delete
97          dc-make-btree dc-btree-lookup dc-btree-store dc-btree-delete))
98
99;;;;;;;;;;;;;;;;;;;;;;;;;;
100;;;
101;;; btree vector - subtype $v_btree
102;;;
103;;; This code belongs in woodequ.lisp
104;;;
105
106; So this will work as a patch file
107(eval-when (:compile-toplevel :execute :load-toplevel)
108
109(unless (boundp '$btree.max-key-size)
110
111(makunbound '$btree-size)
112
113; Defined here so that this file can be distributed as a patch
114; Real definition is in "woodequ.lisp"
115(def-indices
116  $btree.root                           ; the root node
117  $btree.count                          ; number of leaf entries
118  $btree.depth                          ; 0 means only the root exists
119  $btree.nodes                          ; total number of nodes
120  $btree.first-leaf                     ; first leaf node. A constant
121  $btree.type                           ; type bits
122  $btree.max-key-size                   ; maximum size of a key
123  $btree-size                           ; length of a $v_btree vector
124  )
125
126)) ; End unless & compile-when
127
128; This file is being distributed before the with-databases-locked macro
129; is defined in "block-io-mcl.lisp"
130(eval-when (:compile-toplevel :execute :load-toplevel)
131
132(unless (fboundp 'with-databases-locked)
133  (defmacro with-databases-locked (&body body)
134    `(progn ,@body)))
135
136)
137
138;;;;;;;;;;;;;;;;;;;;;;;;;;
139;;;
140;;; Node layout - subtype $v_btree-node
141;;;
142;;;  -------------------
143;;; | $vector-header    |
144;;; | subtype length    |
145;;; |-------------------|
146;;; | link              |
147;;; | parent            |
148;;; | used free         |
149;;; | count flags       |
150;;; | pointer[0]        |
151;;; | len[0] key[0] ... |
152;;; | pointer[1]        |
153;;; | len[1] key[1] ... |
154;;; |        ...        |
155;;; | pointer[m]        |
156;;; | len[m] key[m] ... |
157;;; | pointer[m+1]      |
158;;;  -------------------
159
160;;; $vector-header is the standard vector header marker
161;;; subtype is one byte, it's value is $v_btree-node
162;;; length is the total length of the data portion of the block in bytes
163;;; link is used by the GC so that it can walk btree nodes last.
164;;; parent points at the parent node of this one, or at the btree
165;;;   uvector for the root.
166;;; used is 16 bits: the number of bytes that are in use at $btree_data
167;;; free is 16 bits: the number of free bytes at the end of the block.
168;;; count is 16 bits: the number of entries in this node
169;;; flags is 16 bits of flags.
170;;;   Bit 0 is set for a leaf page.
171;;;   Bit 1 is set for the root page.
172;;; pointer[i] is 4 bytes aligned on a 4-byte boundary.
173;;;   For a leaf node, it points at the indexed data.
174;;;   For a non-leaf node, it points at another node in the tree
175;;;     This node is the branch of the tree containing all entries whose
176;;;     keys are <= key[i].
177;;;   pointer[m+1] for a leaf node points to the next leaf node.
178;;;     This makes a linked list of all the leaf nodes,
179;;;     which is useful for mapping over the entries.
180;;;   pointer[m+1] for a non-leaf node points at the branch of the tree
181;;;     containing entries whose keys are > key[m]
182;;; len[i] is a byte giving the length of key[i]
183;;;   if len[i] is 255, then there are three unused bytes followed
184;;;   by a four byte pointer to a string containing the key.
185;;;   otherwise, len[i] will always be < 128
186;;;   (keys longer than 127 bytes are not yet implemented)
187;;; key[i] is len[i] bytes of characters for the key followed
188;;;   by enough padding bytes to get to the next 4-byte boundary.
189
190(defconstant $btree_link $v_data)
191(defconstant $btree_parent (+ $btree_link 4))
192(defconstant $btree_used (+ $btree_parent 4))
193(defconstant $btree_free (+ $btree_used 2))
194(defconstant $btree_count (+ $btree_free 2))
195(defconstant $btree_flags (+ $btree_count 2))
196(defconstant $btree_data (+ $btree_flags 2))
197
198(defconstant $btree_flags.leaf-bit 0)
199(defconstant $btree_flags.root-bit 1)
200
201(defmacro with-simple-string ((var string) &body body)
202  (let ((thunk (gensym)))
203    `(let ((,thunk #'(lambda (,var) ,@body)))
204       (declare (dynamic-extent ,thunk))
205       (funcall-with-simple-string ,string ,thunk))))
206
207(defun funcall-with-simple-string (string thunk)
208  (if (simple-string-p string)
209    (funcall thunk string)
210    (let* ((len (length string))
211           (simple-string (make-string len :element-type 'base-character)))
212      (declare (dynamic-extent simple-string))
213      (multiple-value-bind (str offset) (ccl::array-data-and-offset string)
214        (wood::%load-string str offset len simple-string))
215      (funcall thunk simple-string))))
216
217; Bound by the garbage collector. We won't type-check this
218; btree as it's subtype has been overwritten with a forwarding pointer.
219(defvar *forwarded-btree* nil)
220
221;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
222;;;
223;;; The documented interface
224;;;
225
226(defun p-make-btree (pheap &key area type)
227  (pptr pheap
228        (dc-make-btree (pheap-disk-cache pheap)
229                       (and area (pheap-pptr-pointer area pheap))
230                       (and type (require-type type 'fixnum)))))
231
232(defun p-make-string-equal-btree (pheap &key area)
233  (p-make-btree pheap :area area :type $btree-type_string-equal))
234
235(defun p-btree-lookup (btree key-string &optional default)
236  (let ((pheap (pptr-pheap btree)))
237    (multiple-value-bind (pointer immediate? found?)
238                         (dc-btree-lookup
239                          (pheap-disk-cache pheap)
240                          (pptr-pointer btree)
241                          (if (stringp key-string)
242                            key-string
243                            (p-load key-string)))
244      (if found?
245        (values
246         (if immediate?
247           pointer
248           (pptr pheap pointer))
249         t)
250        default))))
251
252(defun p-btree-store (btree key-string default &optional (value default))
253  (let ((pheap (pptr-pheap btree)))
254    (multiple-value-bind (pointer immediate?)
255                         (%p-store pheap value)
256    (dc-btree-store
257     (pheap-disk-cache pheap)
258     (pptr-pointer btree)
259     (if (stringp key-string)
260       key-string
261       (p-load key-string))
262     pointer
263     immediate?)
264    (if immediate?
265      pointer
266      (pptr pheap pointer)))))
267
268(defun setf-p-btree-lookup (btree key-string default &optional (value default))
269  (p-btree-store btree key-string default value)
270  value)
271
272(defsetf p-btree-lookup setf-p-btree-lookup)
273
274(defun p-btree-delete (btree key-string)
275  (dc-btree-delete
276   (pptr-disk-cache btree)
277   (pptr-pointer btree)
278   (if (stringp key-string)
279     key-string
280     (p-load key-string))))
281
282(defun p-clear-btree (btree)
283  (dc-clear-btree (pptr-disk-cache btree)
284                  (pptr-pointer btree))
285  btree)
286
287(defmacro p-do-btree ((key value) (btree &optional from to) &body body)
288  (let ((mapper (gensym)))
289    `(let ((,mapper #'(lambda (,key ,value) ,@body)))
290       (declare (dynamic-extent ,mapper))
291       ,(if (or from to)
292          `(p-map-btree ,btree ,mapper ,from ,to)
293          `(p-map-btree ,btree ,mapper)))))
294
295(defun p-map-btree (btree function &optional from to)
296  (let* ((pheap (pptr-pheap btree))
297         (f #'(lambda (disk-cache key value imm?)
298                (declare (ignore disk-cache))
299                (funcall function key (if imm? value (pptr pheap value))))))
300    (declare (dynamic-extent f))
301    (dc-map-btree (pheap-disk-cache pheap)
302                  (pptr-pointer btree)
303                  f
304                  (if (or (null from) (stringp from))
305                    from
306                    (p-load from))
307                  (if (or (null to) (stringp to))
308                    to
309                    (p-load to)))))
310
311(defun p-map-btree-keystrings (btree function &optional from to)
312  (let* ((pheap (pptr-pheap btree))
313         (f #'(lambda (disk-cache key value imm?)
314                (declare (ignore disk-cache value imm?))
315                (funcall function key))))
316    (declare (dynamic-extent f))
317    (dc-map-btree (pheap-disk-cache pheap)
318                  (pptr-pointer btree)
319                  f
320                  (if (or (null from) (stringp from))
321                    from
322                    (p-load from))
323                  (if (or (null to) (stringp to))
324                    to
325                    (p-load to)))))
326
327;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
328;;;
329;;; disk-cache versions of the documented interface
330;;;
331
332(defun dc-make-btree (disk-cache &optional area type)
333  (with-databases-locked
334   (let* ((btree (dc-make-uvector disk-cache $btree-size $v_btree area 0 t))
335          (root (dc-cons-btree-node
336                 disk-cache btree btree
337                 (logior (ash 1 $btree_flags.leaf-bit) (ash 1 $btree_flags.root-bit)))))
338     (accessing-disk-cache (disk-cache)
339       (svset.p btree $btree.root root)
340       (svset.p btree $btree.first-leaf root)
341       (when type
342         (svset.p btree $btree.type (require-type type 'fixnum) t))
343       (svset.p btree $btree.max-key-size 0))
344     btree)))
345
346(defun dc-btree-lookup (disk-cache btree key-string)
347  (with-databases-locked
348   (with-simple-string (key-string key-string)
349     (multiple-value-bind (node offset eq)
350                          (btree-find-leaf-node disk-cache btree key-string)
351       (when eq
352         (multiple-value-bind (pointer immediate?)
353                              (read-pointer disk-cache (+ node offset))
354           (values pointer immediate? t)))))))
355
356(defun dc-btree-store (disk-cache btree key-string value &optional
357                                       value-imm?)
358  (with-databases-locked
359   (with-simple-string (key-string key-string)
360     (if (> (length key-string) 127)
361       (error "Keys longer than 127 bytes not supported yet."))
362     (multiple-value-bind (node offset eq)
363                          (btree-find-leaf-node disk-cache btree key-string)
364       (if eq
365         (setf (read-pointer disk-cache (+ node offset) value-imm?)
366               value)
367         (progn
368           (%btree-insert-in-node
369            disk-cache btree node offset key-string value value-imm?)
370           (accessing-disk-cache (disk-cache)
371             (svset.p btree $btree.count (1+ (svref.p btree $btree.count)) t))
372           (values value value-imm?)))))))
373
374(defun dc-btree-delete (disk-cache btree key-string)
375  (with-databases-locked
376   (with-simple-string (key-string key-string)
377     (if (> (length key-string) 127)
378       (error "Keys longer than 127 bytes not supported yet."))
379     (multiple-value-bind (node offset eq)
380                          (btree-find-leaf-node disk-cache btree key-string)
381       (when eq
382         (%btree-delete-from-node disk-cache btree node offset t)
383         (accessing-disk-cache (disk-cache)
384           (svset.p btree $btree.count (1- (svref.p btree $btree.count)) t))
385         t)))))
386
387(defun dc-clear-btree (disk-cache btree)
388  (require-satisfies dc-vector-subtype-p disk-cache btree $v_btree)
389  (let (root-node first-leaf)
390    (with-databases-locked
391     (setq root-node (dc-%svref disk-cache btree $btree.root)
392           first-leaf (dc-%svref disk-cache btree $btree.first-leaf))
393     (accessing-disk-cache (disk-cache first-leaf)
394       (multiple-value-bind (node used free) (init-btree-node disk-cache first-leaf)
395         (declare (ignore node))
396         (fill.b (+ $btree_data used) 0 free))
397       (store.l btree $btree_parent)
398       (store.w (logior (ash 1 $btree_flags.root-bit)
399                        (ash 1 $btree_flags.leaf-bit)
400                        (load.w $btree_flags))
401             $btree_flags))
402     (dc-%svfill disk-cache btree
403       $btree.root first-leaf
404       ($btree.count t) 0
405       ($btree.count t) 0
406       ($btree.nodes t) 1)
407     (when (> (dc-uvsize disk-cache btree) $btree.max-key-size)         ; backward compatibility
408       (setf (dc-%svref disk-cache btree $btree.max-key-size t) 0)))
409    ; Eventually, rewrite dc-%clear-node to occasionally allow interrupts.
410    (with-databases-locked
411     (dc-%clear-node disk-cache root-node first-leaf)))
412  btree)
413
414(defun dc-%clear-node (disk-cache node first-leaf)
415  (require-satisfies dc-vector-subtype-p disk-cache node $v_btree-node)
416  (unless (eql node first-leaf)
417    (with-locked-page (disk-cache node nil buf offset)
418      (accessing-byte-array (buf offset)
419        (unless (logbitp $btree_flags.leaf-bit (load.w $btree_flags))
420          (let ((p $btree_data))
421            (declare (fixnum p))
422            (dotimes (i (load.w $btree_count))
423              (dc-%clear-node disk-cache (load.l p) first-leaf)
424              (incf p 4)
425              (incf p (normalize-size (1+ (load.b p)) 4)))
426            (dc-%clear-node disk-cache (load.l p) first-leaf)))
427        (dc-free-btree-node disk-cache nil node)))))
428
429; This assumes that actions at event-processing level don't change the
430; B-tree (i.e. the may read the database but not write it), so we can
431; release the database lock around the call to the user function.
432; Otherwise the database would have to be locked across the entire operation.
433; with-databases-locked is required around btree-find-leaf-node, because that
434; function doesn't call with-databases-locked itself, and also because the
435; with-locked-page below has a timing hazard (buf could be reused before
436; the page has been locked) unless it is called inside with-databases-locked.
437(defun dc-map-btree (disk-cache btree function &optional from to)
438  (if (null from)
439    (unless (eql btree *forwarded-btree*)
440      (require-satisfies dc-vector-subtype-p disk-cache btree $v_btree))
441    (unless (stringp from)
442      (setq from (require-type from '(or null string)))))
443  (unless (or (null to) (stringp to))
444    (setq to (require-type to '(or null string))))
445  (with-databases-locked
446   (multiple-value-bind (node p)
447                        (if from
448                          (btree-find-leaf-node disk-cache btree from)
449                          (values (dc-%svref disk-cache btree $btree.first-leaf)
450                                  $btree_data))
451     (declare (fixnum p))
452     (let ((string-equal-function (string-equal-function disk-cache btree))
453           (compare-strings-function (and to (compare-strings-function disk-cache btree))))
454       (loop
455         (block once-per-node
456           (with-locked-page (disk-cache node nil buf buf-offset)
457             (accessing-byte-array (buf buf-offset)
458               (let ((max-p (+ $btree_data (load.w $btree_used) -4)))
459                 (declare (fixnum max-p))
460                 (loop
461                   (when (>= p max-p)
462                     (when (> p max-p)
463                       (error "Inconsistency: pointer off end of btree node"))
464                     (return))
465                   (multiple-value-bind (value imm?) (load.p p)
466                     (let* ((len (load.b (incf p 4)))
467                            (key (make-string len :element-type 'base-character)))
468                       (declare (fixnum len)
469                                (dynamic-extent key))
470                       (load.string (the fixnum (1+ p)) len key)
471                       (when (and to (< (the fixnum (funcall compare-strings-function to key)) 0))
472                         (return-from dc-map-btree nil))
473                       (with-databases-unlocked
474                         (funcall function disk-cache key value imm?))
475                       (let ((newlen (load.b p)))
476                         (declare (fixnum newlen))
477                         (unless (and (eql newlen len)
478                                      (let ((new-key (make-string newlen :element-type 'base-character)))
479                                        (declare (dynamic-extent new-key))
480                                        (load.string (the fixnum (1+ p)) newlen new-key)
481                                        (funcall string-equal-function key new-key)))
482                           ; The user inserted or deleted something that caused
483                           ; the key to move. Need to find it again.
484                           (let (eq)
485                             (multiple-value-setq (node p eq)
486                               (btree-find-leaf-node disk-cache btree key))
487                             (when eq
488                               (incf p (normalize-size (1+ len) 4)))
489                             (return-from once-per-node))))
490                       (incf p (normalize-size (1+ len) 4))))))
491               (setq node (load.l p)
492                     p $btree_data)
493               (when (eql node $pheap-nil)
494                 (return nil))))))))))
495
496;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
497;;;
498;;; Grungy internal details
499;;; First, some generally useful utility functions
500;;;
501
502(defparameter *max-btree-node-size* 512)
503
504(defun dc-cons-btree-node (disk-cache btree parent flags)
505  (let ((node (or (with-locked-page (disk-cache (+ $root-vector $v_data
506                                                   (* 4 $pheap.btree-free-list)))
507                    (accessing-disk-cache (disk-cache)
508                      (let ((free-list (svref.p $root-vector $pheap.btree-free-list)))
509                        (unless (eql $pheap-nil free-list)
510                          (svset.p $root-vector $pheap.btree-free-list
511                                   (accessing-disk-cache (disk-cache free-list)
512                                     (load.l $btree_parent)))
513                          free-list))))
514                  (dc-allocate-new-btree-node disk-cache))))
515    (accessing-disk-cache (disk-cache node)
516      (store.l parent $btree_parent)
517      (store.w flags $btree_flags))
518    (with-locked-page (disk-cache (+ btree $v_data (* 4 $btree.nodes)) t)
519      (accessing-disk-cache (disk-cache)
520        (svset.p btree $btree.nodes (1+ (svref.p btree $btree.nodes)) t)))
521    node))
522
523; This takes care of chopping up a disk block into btree nodes.
524; This is necessary if the page size is bigger than the *max-btree-node-size*.
525; Otherwise, we just use one disk page.
526(defun dc-allocate-new-btree-node (disk-cache)
527  (let ((node (if (>= *max-btree-node-size* (disk-cache-page-size disk-cache))
528                (%dc-allocate-new-memory disk-cache 1 $v_btree-node 0 t)
529                ; Could do most of this size computation at pheap open time and cache the
530                ; results, but it's in the noise compared to everything else.
531                (let* ((segment (%dc-allocate-new-memory disk-cache 1 $v_segment nil))
532                       (segment-size (dc-%vector-size disk-cache segment))
533                       (segment-header-bytes (* 4 $segment-header-size))
534                       (bytes (- segment-size segment-header-bytes))
535                       (bytes-per-node *max-btree-node-size*)
536                       (count (ceiling bytes bytes-per-node))
537                       (size-per-node (- bytes-per-node $vector-header-size))
538                       (first-node-bytes (- bytes (* (1- count) bytes-per-node)))
539                       (first-node-size (- first-node-bytes $vector-header-size))
540                       (ptr (+ segment $v_data segment-header-bytes))
541                       res)
542                  (dc-%svfill disk-cache segment
543                    $segment.area $pheap-nil
544                    $segment.header $pheap-nil)
545                  (dotimes (i count)
546                    (if (eql i 0)
547                      (progn
548                        (setq res (initialize-vector-storage
549                                   disk-cache ptr first-node-size $v_btree-node 1 0 t))
550                        (incf ptr first-node-bytes))
551                      (progn
552                        (dc-free-btree-node
553                         disk-cache nil
554                         (initialize-vector-storage
555                          disk-cache ptr size-per-node $v_btree-node 1 nil))
556                        (incf ptr bytes-per-node))))
557                  (unless (eql (+ segment $v_data segment-size) ptr)
558                    (error "ptr not at segment end"))
559                  res))))
560      (init-btree-node disk-cache node)))
561
562(defun dc-free-btree-node (disk-cache btree node)
563  (multiple-value-bind (node used free) (init-btree-node disk-cache node)
564    (accessing-disk-cache (disk-cache node)
565      (fill.b (+ $btree_data used) 0 free)))
566  (with-locked-page (disk-cache $root-vector t)
567    (accessing-disk-cache (disk-cache)
568      (let ((free-list (svref.p $root-vector $pheap.btree-free-list)))
569        (accessing-disk-cache (disk-cache node)
570          (store.l free-list $btree_parent)))
571      (svset.p $root-vector $pheap.btree-free-list node)))
572  (when btree
573    (with-locked-page (disk-cache (+ btree $v_data (* 4 $btree.nodes)) t)
574      (accessing-disk-cache (disk-cache)
575        (svset.p btree $btree.nodes (1- (svref.p btree $btree.nodes)) t)))))
576
577(defun init-btree-node (disk-cache node)
578  (accessing-disk-cache (disk-cache node)
579    (let* ((vector-size (%vector-size.p node))
580           (data-size (- vector-size (- $btree_data $v_data)))
581           (used 4)
582           (free (- data-size used)))
583      (store.l $pheap-nil $btree_link)
584      (store.w used $btree_used)
585      (store.w free $btree_free)
586      (store.w 0 $btree_count)
587      (store.l $pheap-nil $btree_data)
588      (values node used free))))
589
590(defun %btree-leaf-node-p (disk-cache node)
591  (accessing-disk-cache (disk-cache node)
592    (logbitp $btree_flags.leaf-bit (load.w $btree_flags))))
593
594(defun %btree-root-node-p (disk-cache node)
595  (accessing-disk-cache (disk-cache node)
596    (logbitp $btree_flags.root-bit (load.w $btree_flags))))
597
598(defun compare-strings-function (disk-cache btree)
599  (if (logbitp $btree-type_string-equal-bit
600               (dc-%svref disk-cache btree $btree.type))
601    #'compare-strings-case-insensitive
602    #'compare-strings))
603
604; New function
605(defun compare-strings-case-insensitive (str1 str2)
606  (cond ((string-lessp str1 str2) -1)
607        ((string-equal str1 str2) 0)
608        (t 1)))
609
610; New function
611(defun string-equal-function (disk-cache btree)
612  (if (logbitp $btree-type_string-equal-bit
613               (dc-%svref disk-cache btree $btree.type))
614    #'string-equal
615    #'string=))
616
617; Returns two values:
618; 1) offset - from node for the place where key-string goes
619; 2) eq     - True if the key at this offset is key-string
620(defun btree-find-leaf-node (disk-cache btree key-string)
621  (unless (eql btree *forwarded-btree*)
622    (require-satisfies dc-vector-subtype-p disk-cache btree $v_btree))
623  (let ((node (dc-%svref disk-cache btree $btree.root))
624        (case-sensitive? (not (logbitp $btree-type_string-equal-bit
625                                       (dc-%svref disk-cache btree $btree.type)))))
626    (loop
627      (multiple-value-bind (offset eq)
628                           (%btree-search-node
629                            disk-cache node key-string case-sensitive?)
630        (when (%btree-leaf-node-p disk-cache node)
631          (return (values node offset eq)))
632        (setq node (read-long disk-cache (+ node offset)))
633        (require-satisfies dc-vector-subtype-p
634                           disk-cache node $v_btree-node)))))
635
636; This one calls the disk-cache code directly and accesses the
637; page vector itself so that it can be reasonably fast.
638; Returns same two values as btree-find-leaf-node
639; plus a third value which is the offset to the node just to the left of the found one.
640
641
642#| ; old linear search code
643(defun %btree-search-node (disk-cache node key-string case-sensitive?)
644  (let ((compare-strings-function (if case-sensitive? #'compare-strings #'compare-strings-case-insensitive)))
645    (with-locked-page (disk-cache node nil vec offset bytes)
646      (declare (fixnum offset bytes))
647      (accessing-byte-array (vec offset)
648        (let* ((end (+ offset $btree_data (load.uw $btree_used)))
649               (ptr (+ offset $btree_data 4))
650               (last-ptr nil))
651          (declare (fixnum end ptr))
652          (declare (fixnum offset bytes))
653          (unless (>= (the fixnum (+ offset bytes)) end)
654            (error "End of btree node is past end of disk page"))
655          (loop
656            (if (>= ptr end)
657              (return (values (- ptr offset 4)
658                              nil
659                              (if last-ptr (- last-ptr offset 4)))))
660            (let* ((len (aref vec ptr))
661                   (str (make-string len :element-type 'base-character)))
662              (declare (dynamic-extent str))
663              (%copy-byte-array-portion vec (the fixnum (1+ ptr)) len str 0)
664              ; Here's where we'll eventually use part of the
665              ; $btree_flags to select a sorting predicate.
666              (let ((compare (funcall compare-strings-function key-string str)))
667                (declare (fixnum compare))
668                (when (<= compare 0)
669                  (return (values (- ptr offset 4)
670                                  (eql compare 0)
671                                  (if last-ptr (- last-ptr offset 4))))))
672              (setq last-ptr ptr)
673              (incf ptr (normalize-size (+ 5 len) 4)))))))))
674|#
675
676; New binary search code: Moon's idea.
677(defun %btree-search-node (disk-cache node key-string case-sensitive?)
678  (with-locked-page (disk-cache node nil vec offset)
679    (declare (fixnum offset)
680             (type (simple-array (unsigned-byte 8) (*)) vec))
681    (accessing-byte-array (vec)
682      (let* ((count (load.uw (+ $btree_count offset)))
683             (min 0)                    ; inclusive lower bound
684             (max count)                ; exclusive upper bound
685             (ptrs (make-array count))
686             (lens (make-array count))
687             (fill-pointer 0)           ; unlike Common Lisp, this is an inclusive upper bound
688             (key-len (length key-string))
689             (offset+4 (+ offset 4))
690             (offset+5 (+ offset 5)))
691        (declare (fixnum count min max fill-pointer offset+4 offset+5)
692                 (dynamic-extent ptrs lens)
693                 (simple-vector ptrs lens)
694                 (optimize (speed 3) (safety 0)))
695        (when (eql count 0)
696          (return-from %btree-search-node $btree_data))
697        (setf (svref ptrs 0) $btree_data
698              (svref lens 0) (aref vec (the fixnum (+ $btree_data offset+4))))  ; (load.b (+ $btree_data offset 4))
699        (flet ((get-ptr (index)
700                 (declare (fixnum index))
701                 (if (<= index fill-pointer)
702                   (values (svref ptrs index) (svref lens index))
703                   (let ((p (svref ptrs fill-pointer))
704                         (len (svref lens fill-pointer)))
705                     (declare (fixnum p len))
706                     (loop
707                         (incf p (the fixnum (normalize-size (the fixnum (+ 5 len)) 4)))
708                         (setq len (aref vec (the fixnum (+ p offset+4))))     ;  (load.b (+ p 4 offset))
709                         (setf (svref ptrs (incf fill-pointer)) p)
710                         (setf (svref lens fill-pointer) len)
711                         (when (eql fill-pointer index)
712                           (return (values p len)))))))
713               (compare-strings (s1 i1 end1 s2 i2 end2 case-sensitive?)
714                 (declare (fixnum i1 end1 i2 end2)
715                          (type (simple-array (unsigned-byte 8) (*)) s1 s2)
716                          (optimize (speed 3) (safety 0)))
717                 ; s1 is a simple string and s2 is a (simple-array (unsigned-byte 8) (*))
718                 ; Since these are stored the same way in memory, we can assume
719                 ; That both are (unsigned-byte 8) or both are simple strings and
720                 ; the inline code will work.
721                 ; (Unfortunatedly, PPC MCL does not inline %schar, so we need to use aref
722                 ; instead).
723                 (if case-sensitive?
724                   (loop
725                     (when (>= i1 end1)
726                       (return (if (eql i2 end2) 0 -1)))
727                     (when (>= i2 end2) (return 1))
728                     (let ((c1 (aref s1 i1))
729                           (c2 (aref s2 i2)))
730                       (declare (fixnum c1 c2))
731                       (if (<= c1 c2)
732                         (if (< c1 c2)
733                           (return -1))
734                         (return 1)))
735                     (incf i1)
736                     (incf i2))
737                   (loop
738                     (when (>= i1 end1)
739                       (return (if (eql i2 end2) 0 -1)))
740                     (when (>= i2 end2) (return 1))
741                     (let ((c1 (ccl::%char-code (char-upcase (ccl::%code-char (aref s1 i1)))))
742                           (c2 (ccl::%char-code (char-upcase (ccl::%code-char (aref s2 i2))))))
743                       (declare (fixnum c1 c2))
744                       (if (<= c1 c2)
745                         (if (< c1 c2)
746                           (return -1))
747                         (return 1)))
748                     (incf i1)
749                     (incf i2)))))
750          (declare (inline get-ptr compare-strings))
751          (loop
752            (let ((index (ash (the fixnum (+ min max)) -1)))
753              (declare (fixnum index))
754              (multiple-value-bind (ptr len) (get-ptr index)
755                (declare (fixnum ptr len))
756                (let* ((vec-idx (+ ptr (the fixnum offset+5)))
757                       (vec-end (+ vec-idx len))
758                       (compare (compare-strings key-string 0 key-len vec vec-idx vec-end case-sensitive?)))
759                  (declare (fixnum vec-idx vec-end compare))
760                  (if (<= compare 0)
761                    (progn
762                      (setq max index)
763                      (when (or (eql compare 0) (eql min max))
764                        (return (values ptr
765                                        (eql compare 0)
766                                        (unless (eql index 0)
767                                          (svref ptrs (the fixnum (1- index))))))))
768                    (progn
769                      (setq min (1+ index))
770                      (when (eql min max)
771                        (return (values (the fixnum (+ ptr (normalize-size (the fixnum (+ 5 len)) 4)))
772                                        nil
773                                        ptr))))))))))))))
774
775(defun compare-strings (str1 str2)
776  (cond ((string< str1 str2) -1)
777        ((string= str1 str2) 0)
778        (t 1)))
779
780; Search a node for a pointer to a subnode.
781; Return two values, the offset for the subnode, and the offset
782; for the subnode just before it.
783; If right-node-p is true, return a third value, the offset of the subnode
784; just after subnode.
785(defun %btree-search-for-subnode (disk-cache node subnode &optional right-node-p)
786  (with-locked-page (disk-cache node nil vec offset bytes)
787    (declare (fixnum offset bytes))
788    (accessing-byte-array (vec offset)
789      (let* ((end (+ offset $btree_data (load.uw $btree_used)))
790             (ptr (+ offset $btree_data))
791             (last-ptr nil))
792        (declare (fixnum end ptr))
793        (declare (fixnum offset bytes))
794        (unless (>= (the fixnum (+ offset bytes)) end)
795          (error "End of btree node is past end of disk page"))
796        (accessing-byte-array (vec)
797          (loop
798            (when (eql subnode (load.p ptr))
799              (return (values (- ptr offset)
800                              (if last-ptr (- last-ptr offset))
801                              (when right-node-p
802                                (let ((right-ptr (+ ptr 4)))
803                                  (declare (fixnum right-ptr))
804                                  (unless (>= right-ptr end)
805                                    (incf right-ptr (normalize-size (1+ (load.b right-ptr)) 4))
806                                    (- right-ptr offset)))))))
807            (setq last-ptr ptr)
808            (incf ptr 4)
809            (if (>= ptr end)
810              (return nil))
811            (incf ptr (normalize-size (1+ (load.b ptr)) 4))))))))
812
813; Fill the SIZES array with the sizes of the entries in NODE.
814; If one of the entries is at INSERT-OFFSET, put INSERT-SIZE
815; into SIZES at that index, and return the index.
816; Otherwise, return NIL.
817(defun %lookup-node-sizes (disk-cache node sizes count &optional insert-offset insert-size
818                                      (start 0))
819  (accessing-disk-cache (disk-cache node)
820    (unless count
821      (setq count (load.uw $btree_count)))
822    (when insert-offset (incf count))
823    (let ((p (+ $btree_data 4))
824          (p-at-offset (and insert-offset (+ insert-offset 4)))
825          insert-index
826          (index (require-type start 'fixnum)))
827      (declare (fixnum p))
828      (dotimes (i count)
829        (if (eql p p-at-offset)
830          (setf (aref sizes index) insert-size
831                insert-index index
832                p-at-offset nil)
833          (incf p (setf (aref sizes index) (normalize-size (+ 5 (load.b p)) 4))))
834        (incf index))
835      (when (and insert-offset (null insert-index))
836        (error "Inconsistency: didn't find insert-offset"))
837      (unless (eql p (+ $btree_data (load.uw $btree_used)))
838        (error "Inconsistency: walking node's entries didn't end up at end"))
839      insert-index)))
840
841; When we move entries around in a non-leaf nodes, the parent pointers
842; need to be updated.
843; This will go away if I eliminate the parent pointers and replace
844; them with passing around the ancestor list.
845; Doing this will make insertion and deletion slightly faster
846; at the expense of making it hard to click around in a btree
847; in the inspector.
848(defun %btree-update-childrens-parents (disk-cache node &optional start-ptr end-ptr)
849  (with-locked-page (disk-cache node nil node-buf node-buf-offset)
850    (accessing-byte-array (node-buf)
851      (let* ((used (load.uw (+ node-buf-offset $btree_used)))
852             (p (or start-ptr (+ node-buf-offset $btree_data)))
853             (max-p (or end-ptr (+ node-buf-offset $btree_data used)))
854             child)
855        (declare (fixnum p max-p))
856        (loop
857          (setq child (load.p p))
858          (require-satisfies dc-vector-subtype-p disk-cache child $v_btree-node)
859          (accessing-disk-cache (disk-cache child)
860            (store.p node $btree_parent))
861          (incf p 4)
862          (when (>= p max-p)
863            (unless (eql p max-p)
864              (error "Inconsistency. Node scan went past expected end."))
865            (return))
866          (incf p (normalize-size (+ 1 (load.b p)) 4)))))))
867
868; Update and return the maximum key size
869(defun dc-btree-max-key-size (disk-cache btree &optional new-size)
870  (when new-size
871    (setq new-size (require-type new-size 'fixnum)))
872  (if (<= (dc-uvsize disk-cache btree) $btree.max-key-size)
873    132                                 ; old btrees don't track max key size
874    (let ((size (dc-%svref-fixnum disk-cache btree $btree.max-key-size "$btree.max-key-size")))
875      (if (and new-size (> new-size size))
876        (setf (dc-%svref disk-cache btree $btree.max-key-size t) new-size)
877        size))))
878
879;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
880;;
881;; Here's where the guts of an insert happens.
882;; We know that the key-string belongs at offset from node.
883;; Insert it there if it fits.
884;; Otherwise split this node into two after creating room in each
885;; node in the access path.
886;;
887
888(defun %btree-insert-in-node (disk-cache btree node offset key-string value
889                                             &optional value-imm? (key-length (length key-string)))
890  (accessing-disk-cache (disk-cache node)
891    (let* ((free (load.uw $btree_free))
892           (used (load.uw $btree_used))
893           (size (normalize-size (+ 5 key-length) 4))
894           (max-key-size (dc-btree-max-key-size disk-cache btree size)))        ; update & get max key size
895      (declare (fixnum free used size))
896      (if (> key-length 127)
897        (error "Keys longer than 127 not supported yet."))
898      (when (<= size free)
899        ; Will fit in this node
900        (with-locked-page (disk-cache node t node-buf node-buf-offset nil node-page)
901          (let* ((bytes-to-move (- used (- offset $btree_data)))
902                 (p (+ node-buf-offset offset)))
903            (declare (fixnum bytes-to-move p))
904            (%copy-byte-array-portion node-buf p bytes-to-move
905                                      node-buf (+ p size) node-page)
906            (%store-btree-entry
907             node-buf p node-page
908             key-string key-length value value-imm? size)
909            (accessing-byte-array (node-buf node-buf-offset node-page)
910              (store.w (1+ (load.uw $btree_count)) $btree_count)
911              (store.w (+ used size) $btree_used)
912              (store.w (- free size) $btree_free))))
913        (return-from %btree-insert-in-node nil))
914      ; Won't fit. Split the node
915      (unless (%btree-leaf-node-p disk-cache node)
916        (error "Wasn't room to insert in non-leaf node"))
917      (%create-insertion-path disk-cache btree key-string max-key-size)
918      (%split-node disk-cache btree node free used
919                   t offset key-string value value-imm? key-length size)
920      ; This slightly increases non-leaf node space utilization, but
921      ; at a 10% time penalty, so I've nuked it.
922      ;(%sew-up-insertion-path disk-cache btree node t)
923      )))
924
925(eval-when (:load-toplevel :compile-toplevel :execute)
926
927(defvar *enable-debug-break* nil)
928
929)  ; end of eval-when
930
931(defmacro debug-break (format-string &rest format-args)
932  (when *enable-debug-break*
933    `(%debug-break ,format-string ,@format-args)))
934
935(defun %debug-break (format-string &rest format-args)
936  (when *enable-debug-break*
937    (let ((*print-base* 16.))
938      (apply 'cerror "Continue" format-string format-args))))
939
940; There is an important difference between the leaf and non-leaf nodes.
941; When entries are shifted between leaf nodes, the parent node does not
942; need to be referenced except to update it with the last key in the
943; left-hand leaf node. When non-leaf nodes are shifted, the shifting needs
944; to go through the parent node, i.e. the key in the parent that is between
945; the keys in the left child and the right child does not appear in either child
946; and needs to move to one of them. After we're done, the parent will have
947; a new key that used to be in one of its children but isn't any more.
948
949(defun %split-node (disk-cache btree node free used leaf-p &optional
950                      offset key-string value value-imm? key-length (size 0))
951  (when (%shift-node-left disk-cache btree node free used leaf-p
952                          offset key-string value value-imm? key-length size)
953    (return-from %split-node :shift-left))
954  (when (%shift-node-right disk-cache btree node free used leaf-p
955                          offset key-string value value-imm? key-length size)
956    (return-from %split-node :shift-right))
957  (with-locked-page (disk-cache node t node-buf node-buf-offset nil node-page)
958    (accessing-byte-array (node-buf node-buf-offset node-page)
959      (let* ((parent (load.p $btree_parent))
960             (old-count (load.uw $btree_count))
961             (count (if offset (1+ old-count) old-count))
962             (sizes (make-array count))
963             (insert-index (%lookup-node-sizes disk-cache node sizes old-count offset size))
964             (new-used (+ used size))
965             (last-new-used 0)
966             (new-count 0)
967             (last-size 0)
968             (right-node (dc-cons-btree-node
969                          disk-cache btree parent (if leaf-p (ash 1 $btree_flags.leaf-bit) 0)))
970             (right-used 4)
971             (last-right-used 0)
972             (right-count 0)
973             (last-last-size 0))
974        (declare (fixnum old-count count new-used new-count last-size right-used right-count
975                         last-new-used last-right-used last-last-size)
976                 (dynamic-extent sizes))
977        (loop for i from (1- count) downto 0
978              finally (error "Didn't find a split point")
979              do
980              (setq last-new-used new-used
981                    last-right-used right-used
982                    last-last-size last-size)
983              (if leaf-p
984                (progn
985                  (incf right-used (setq last-size (aref sizes i)))
986                  (decf new-used last-size))
987                (progn
988                  (incf right-used last-last-size)
989                  (decf new-used (setq last-size (aref sizes i)))))
990              (when (>= right-used new-used)
991                (setq new-count i)
992                (let ((diff (- right-used new-used))
993                      (old-diff (- last-new-used last-right-used)))
994                  (when (> diff old-diff)
995                    (setq new-used last-new-used
996                          right-used last-right-used
997                          new-count (1+ i))))
998                (setq last-size (aref sizes (1- new-count)))
999                (return)))
1000        (debug-break "after figuring where to put split")
1001        (with-locked-page (disk-cache right-node t right-buf right-offset nil right-page)
1002          (setq right-count (- count new-count (if leaf-p 0 1)))
1003          (let* ((insert-middle (if leaf-p
1004                                  (eql insert-index (1- new-count))
1005                                  (eql insert-index new-count)))
1006                 (insert-left (and insert-index (< insert-index new-count)))
1007                 (insert-not-right (or (not insert-index) insert-middle insert-left))
1008                 (end-ptr (+ node-buf-offset $btree_data new-used))
1009                 (node-ptr (- end-ptr
1010                              (if insert-left size 0)
1011                              (if leaf-p last-size 0)))
1012                 (last-string-length (if insert-middle
1013                                       key-length
1014                                       (accessing-byte-array (node-buf) (load.b node-ptr))))
1015                 (last-string (make-string last-string-length :element-type 'base-character)))
1016            (declare (fixnum end-ptr node-ptr last-string-length)
1017                     (dynamic-extent last-string))
1018            (debug-break "About to fill last-string")
1019            (if insert-middle
1020              (setq last-string key-string)
1021              (%copy-byte-array-portion node-buf (1+ node-ptr) last-string-length
1022                                        last-string 0))
1023            (if insert-not-right
1024              ; New entry goes in node or there is no new entry & this is a non-leaf node
1025              (let ((bytes-to-shift 0))
1026                (declare (fixnum bytes-to-shift))
1027                (setq node-ptr (- (+ node-buf-offset $btree_data used) right-used))
1028                (debug-break "Before first copy-byte-array-portion")
1029                (%copy-byte-array-portion
1030                 node-buf node-ptr right-used right-buf (+ right-offset $btree_data) right-page)
1031                (when insert-left
1032                  (setq node-ptr (+ node-buf-offset offset)
1033                        bytes-to-shift (- new-used (- offset $btree_data) size))
1034                  (debug-break "About to open up node")
1035                  (%copy-byte-array-portion
1036                   node-buf node-ptr bytes-to-shift node-buf (+ node-ptr size))
1037                  (%store-btree-entry node-buf node-ptr node-page
1038                                      key-string key-length value value-imm? size)))
1039              ; New entry goes in right-node. leaf-p is true.
1040              (let* ((bytes-to-copy (- right-used size))
1041                     (bytes-before-offset (- offset $btree_data (- new-used 4)))
1042                     (bytes-after-offset (- bytes-to-copy bytes-before-offset))
1043                     (right-ptr (+ right-offset $btree_data)))
1044                (declare (fixnum bytes-to-copy bytes-before-offset bytes-after-offset right-ptr))
1045                (setq node-ptr (- (+ node-buf-offset $btree_data used) bytes-to-copy))
1046                (debug-break "Before first copy-byte-array-portion when insert goes right")
1047                (%copy-byte-array-portion
1048                 node-buf node-ptr bytes-before-offset right-buf right-ptr right-page)
1049                (incf node-ptr bytes-before-offset)
1050                (incf right-ptr bytes-before-offset)
1051                (%store-btree-entry right-buf right-ptr right-page
1052                                    key-string key-length value value-imm? size)
1053                (incf right-ptr size)
1054                (%copy-byte-array-portion
1055                 node-buf node-ptr bytes-after-offset right-buf right-ptr right-page)))
1056          (accessing-byte-array (node-buf nil node-page)
1057            (let ((parent-offset (%btree-search-for-subnode disk-cache parent node)))
1058              (unless parent-offset
1059                (error "Couldn't find node ~s in parent node ~s" node parent))
1060              (setq node-ptr (+ node-buf-offset $btree_data new-used -4))
1061              (debug-break "About to fix up node")
1062              (if leaf-p
1063                (store.p right-node node-ptr)
1064                (when insert-middle
1065                  (store.p value node-ptr value-imm?)))
1066              (incf node-ptr 4)
1067              (accessing-disk-cache (disk-cache parent)
1068                (store.p right-node parent-offset))
1069              (fill.b node-ptr 0 (- used new-used))
1070              (debug-break "About to insert in parent node")
1071              (%btree-insert-in-node
1072               disk-cache btree parent parent-offset last-string node nil last-string-length))))
1073          (accessing-byte-array (node-buf node-buf-offset node-page)
1074            (store.w new-count $btree_count)
1075            (store.w (- (+ free used) new-used) $btree_free)
1076            (store.w new-used $btree_used))
1077          (accessing-byte-array (right-buf right-offset right-page)
1078            (store.w right-count $btree_count)
1079            (let ((total (+ (load.uw $btree_used) (load.uw $btree_free))))
1080              (store.w (- total right-used) $btree_free)
1081              (store.w right-used $btree_used)))
1082          (unless leaf-p
1083            (%btree-update-childrens-parents disk-cache right-node))
1084          (debug-break "Done with %split-node")))
1085      :split)))
1086
1087; Attempt to shift the node left enough to make room for the new key-string
1088; Works for non-leaf-nodes as well
1089; Non-leaf nodes are harder since the shifting has to go through the parent.
1090; This function exists because without it the space utilization is only 51%.
1091; There is guaranteed (due to %create-insertion-path) to be enough room in the parent.
1092; Returns true if it succeeded.
1093; Otherwise, makes no changes and returns nil.
1094(defun %shift-node-left (disk-cache btree node free used leaf-p
1095                                       offset key-string value value-imm? key-length size)
1096  ;(let ((*enable-debug-break* (not leaf-p)))
1097  (unless offset
1098    ; If we're not inserting, we need to make room for the maximum node size
1099    (setq size (dc-btree-max-key-size disk-cache btree)))
1100  (unless (%btree-root-node-p disk-cache node)
1101    (with-locked-page (disk-cache node t node-buf node-buf-offset nil node-page)
1102      (declare (fixnum node-buf-offset))
1103      (accessing-byte-array (node-buf node-buf-offset node-page)
1104        (let* ((parent (load.p $btree_parent)))
1105          (multiple-value-bind (node-offset left-offset)
1106                               (%btree-search-for-subnode disk-cache parent node)
1107            (when left-offset
1108              (let* ((left-node (accessing-disk-cache (disk-cache parent) (load.p left-offset)))
1109                     (left-free (accessing-disk-cache (disk-cache left-node) (load.uw $btree_free)))
1110                     (new-left-free left-free)
1111                     (left-used (accessing-disk-cache (disk-cache left-node) (load.uw $btree_used)))
1112                     (new-left-used left-used)
1113                     (new-free free)
1114                     (new-used used)
1115                     (count (load.uw $btree_count))
1116                     (sizes (make-array (the fixnum (1+ count))))
1117                     (bytes-moved 0)
1118                     (bytes-received 0)
1119                     (bytes-needed (- size free))
1120                     (bytes-to-offset (if offset (- offset $btree_data) most-positive-fixnum))
1121                     (count-diff 0)
1122                     (last-shifted-entry-size 0)
1123                     (initial-parent-size (- node-offset left-offset))
1124                     (parent-size initial-parent-size)
1125                     (new-parent-size (if leaf-p 0 parent-size)))
1126                (declare (fixnum left-free new-left-free left-used new-left-used
1127                                 free new-free used new-used count
1128                                 bytes-moved bytes-needed bytes-to-offset
1129                                 count-diff last-shifted-entry-size
1130                                 initial-parent-size parent-size new-parent-size)
1131                         (dynamic-extent sizes))
1132                (%lookup-node-sizes disk-cache node sizes count)
1133                (with-locked-page (disk-cache left-node t left-buf left-buf-offset nil left-page)
1134                  (declare (fixnum left-buf-offset))
1135                  (with-locked-page (disk-cache parent t parent-buf parent-buf-offset nil parent-page)
1136                    (declare (fixnum parent-buf-offset))
1137                    (labels ((shift-left (new-entry-in-node)
1138                               (decf new-used bytes-moved)
1139                               (incf new-free bytes-moved)
1140                               (incf new-left-used bytes-received)
1141                               (decf new-left-free bytes-received)
1142                               (let* ((node-ptr (+ node-buf-offset $btree_data))
1143                                      (left-ptr (+ left-buf-offset $btree_data left-used -4))
1144                                      (bytes-to-last-string (+ (- bytes-moved last-shifted-entry-size) 4))
1145                                      (bytes-to-copy (if (or leaf-p (not new-entry-in-node))
1146                                                       bytes-moved
1147                                                       bytes-to-last-string)))
1148                                 (declare (fixnum node-ptr left-ptr bytes-to-last-string bytes-to-copy))
1149                                 (debug-break "About to copy bytes left")
1150                                 (unless leaf-p
1151                                   (let ((parent-ptr (+ left-offset parent-buf-offset 4))
1152                                         (parent-bytes (- parent-size 4)))
1153                                     (declare (fixnum parent-ptr parent-bytes))
1154                                     (incf left-ptr 4)
1155                                     (%copy-byte-array-portion parent-buf parent-ptr parent-bytes
1156                                                               left-buf left-ptr left-page)
1157                                     (incf left-ptr parent-bytes)))
1158                                 (%copy-byte-array-portion node-buf node-ptr bytes-to-copy
1159                                                           left-buf left-ptr left-page)
1160                                 (incf left-ptr bytes-to-copy)
1161                                 (when leaf-p
1162                                   (accessing-byte-array (left-buf nil left-page)
1163                                     (store.p node left-ptr)))
1164                                 (if new-entry-in-node
1165                                   (let ((node-ptr (+ node-ptr bytes-to-last-string))
1166                                         (parent-ptr (open-parent last-shifted-entry-size)))
1167                                     (declare (fixnum node-ptr parent-ptr))
1168                                     (debug-break "About to copy node to parent")
1169                                     (%copy-byte-array-portion node-buf node-ptr (- last-shifted-entry-size 4)
1170                                                               parent-buf parent-ptr parent-page))
1171                                   (let* ((parent-ptr (open-parent size))
1172                                          (key-size (1+ key-length))
1173                                          (fill-count (- (normalize-size key-size 4) key-size)))
1174                                     (declare (fixnum parent-ptr key-size fill-count))
1175                                     (debug-break "About to enter key-string in parent")
1176                                     (accessing-byte-array (parent-buf 0 parent-page)
1177                                       (store.b key-length parent-ptr))
1178                                     (incf parent-ptr)
1179                                     (%copy-byte-array-portion key-string 0 key-length
1180                                                               parent-buf parent-ptr  parent-page)
1181                                     (incf parent-ptr key-length)
1182                                     (unless (eql 0 fill-count)
1183                                       (accessing-byte-array (parent-buf 0 parent-page)
1184                                         (fill.b parent-ptr 0 fill-count)))))
1185                                 (debug-break "About to shift node contents left")
1186                                 (%copy-byte-array-portion node-buf (+ node-ptr bytes-moved) new-used
1187                                                           node-buf node-ptr node-page)
1188                                 (incf node-ptr new-used)
1189                                 (accessing-byte-array (node-buf nil node-page)
1190                                   (fill.b node-ptr 0 bytes-moved))
1191                                 (debug-break "Exiting shift-left")
1192                                 ))
1193                             (update-free-and-used ()
1194                               (accessing-byte-array (node-buf node-buf-offset node-page)
1195                                 (store.w new-used $btree_used)
1196                                 (store.w new-free $btree_free)
1197                                 (store.w (- (load.uw $btree_count) count-diff) $btree_count))
1198                               (accessing-byte-array (left-buf left-buf-offset left-page)
1199                                 (store.w new-left-used $btree_used)
1200                                 (store.w new-left-free $btree_free)
1201                                 (store.w (+ (load.uw $btree_count) count-diff) $btree_count))
1202                               (unless leaf-p
1203                                 (%btree-update-childrens-parents
1204                                  disk-cache left-node (+ left-buf-offset $btree_data left-used initial-parent-size -4)))
1205                               (debug-break "Free and used updated")
1206                               )
1207                             (open-parent (size)
1208                               (accessing-byte-array (parent-buf parent-buf-offset parent-page)
1209                                 (let* ((old-size (- node-offset left-offset))
1210                                        (size-diff (- old-size size))
1211                                        (parent-used (load.uw $btree_used))
1212                                        (parent-free (load.uw $btree_free))
1213                                        (ptr (+ parent-buf-offset node-offset))
1214                                        (bytes-to-move (- parent-used (- node-offset $btree_data))))
1215                                   (declare (fixnum old-size size-diff parent-used parent-free
1216                                                    ptr bytes-to-move))
1217                                   (unless (eql size-diff 0)
1218                                     (debug-break "About to shift parent tail")
1219                                     (%copy-byte-array-portion
1220                                      parent-buf ptr bytes-to-move
1221                                      parent-buf (- ptr size-diff) parent-page)
1222                                     (when (> size-diff 0)
1223                                       (incf ptr (- bytes-to-move size-diff))
1224                                       (accessing-byte-array (parent-buf nil parent-page)
1225                                         (fill.b ptr 0 size-diff)))
1226                                     (store.w (- parent-used size-diff) $btree_used)
1227                                     (store.w (+ parent-free size-diff) $btree_free))
1228                                   (+ parent-buf-offset left-offset 4)))))
1229                      (declare (dynamic-extent #'shift-left #'update-free-and-used
1230                                               #'open-parent))
1231                      (dotimes (i count (error "Didn't run over offset"))
1232                        (when (>= bytes-received left-free)
1233                          ; Ran out of room in left node
1234                          (return nil))
1235                        (when (>= bytes-moved bytes-needed)
1236                          ; The new entry now fits in node
1237                          (setq count-diff i)
1238                          (shift-left t)
1239                          (update-free-and-used)
1240                          (when offset
1241                            (%btree-insert-in-node disk-cache btree node (- offset bytes-moved)
1242                                                   key-string value value-imm? key-length))
1243                          (debug-break "Done with insertion in node")
1244                          ;                       #+bill (check-btree-consistency disk-cache btree)
1245                          (return t))
1246                        (when (>= bytes-moved bytes-to-offset)
1247                          (unless (eql bytes-moved bytes-to-offset)
1248                            (error "Inconsistency: offset was not at an entry boundary"))
1249                          (unless (or (not leaf-p) (>= new-left-free (+ size bytes-received)))
1250                            (debug-break "Couldn't shift left")
1251                            (return nil))
1252                          ; The new entry fits at the end of left-node
1253                          (setq count-diff i)
1254                          (shift-left nil)
1255                          (when leaf-p
1256                            (let ((left-ptr (+ left-buf-offset $btree_data new-left-used -4)))
1257                              (declare (fixnum left-ptr))
1258                              (debug-break "Storing new entry in left neighbor")
1259                              (%store-btree-entry left-buf left-ptr left-page
1260                                                  key-string key-length value value-imm? size)
1261                              (incf left-ptr size)
1262                              (accessing-byte-array (left-buf)
1263                                (store.p node left-ptr))
1264                              (incf new-left-used size)
1265                              (decf new-left-free size)
1266                              (accessing-byte-array (left-buf left-buf-offset)
1267                                (store.w (1+ (load.uw $btree_count)) $btree_count))))
1268                          (update-free-and-used)
1269                          (debug-break "Done with insertion in left neighbor")
1270                          ;#+bill (check-btree-consistency disk-cache btree)
1271                          (return t))
1272                        (setq last-shifted-entry-size (aref sizes i))
1273                        (if leaf-p
1274                          (progn
1275                            (incf bytes-moved last-shifted-entry-size)
1276                            (incf bytes-received last-shifted-entry-size))
1277                          (progn
1278                            (incf bytes-moved last-shifted-entry-size)
1279                            (incf bytes-received new-parent-size)
1280                            (setq new-parent-size last-shifted-entry-size)))))))))))))))
1281
1282
1283; Attempt to shift the node right enough to make room for the new key-string
1284; This is necessary because inserting in reverse order foils %shift-node-left
1285; This doesn't handle the non-leaf case yet. It's hardly worth it.
1286(defun %shift-node-right (disk-cache btree node free used leaf-p
1287                             offset key-string value value-imm? key-length size)
1288  (declare (fixnum free used offset key-length))
1289  ;(return-from %shift-node-right nil)   ; not yet debugged.
1290  (when (and leaf-p (not (%btree-root-node-p disk-cache node)))
1291    (with-locked-page (disk-cache node t node-buf node-buf-offset nil node-page)
1292      (accessing-byte-array (node-buf node-buf-offset node-page)
1293        (let* ((parent (load.p $btree_parent)))
1294          (multiple-value-bind (node-offset left-offset right-offset)
1295                               (%btree-search-for-subnode disk-cache parent node t)
1296            (declare (ignore left-offset))
1297            (when right-offset
1298              (let* ((right-node (accessing-disk-cache (disk-cache parent) (load.p right-offset)))
1299                     (right-free (accessing-disk-cache (disk-cache right-node) (load.uw $btree_free)))
1300                     (new-right-free right-free)
1301                     (right-used (accessing-disk-cache (disk-cache right-node) (load.uw $btree_used)))
1302                     (new-right-used right-used)
1303                     (new-free free)
1304                     (new-used used)
1305                     (count (load.uw $btree_count))
1306                     (sizes (make-array (the fixnum (1+ count))))
1307                     (bytes-moved 0)
1308                     (bytes-needed (- size free))
1309                     (bytes-to-offset (- used (- offset $btree_data) 4))
1310                     (count-diff 0))
1311                (declare (fixnum right-length right-offset right-free new-right-free
1312                                 right-used new-right-used new-free new-used count
1313                                 bytes-moved bytes-needed bytes-to-offset
1314                                 count-diff last-shifted-entry-size)
1315                         (dynamic-extent sizes))
1316                (%lookup-node-sizes disk-cache node sizes count)
1317                (with-locked-page (disk-cache right-node t right-buf right-buf-offset nil right-page)
1318                  (flet ((shift-right (&optional (new-entry-size 0))
1319                           (decf new-used bytes-moved)
1320                           (incf new-free bytes-moved)
1321                           (incf new-right-used bytes-moved)
1322                           (decf new-right-free bytes-moved)
1323                           (let ((node-ptr (+ node-buf-offset $btree_data new-used -4))
1324                                 (right-ptr (+ right-buf-offset $btree_data)))
1325                             (declare (fixnum node-ptr right-ptr))
1326                             (debug-break "About to copy bytes right")
1327                             (%copy-byte-array-portion right-buf right-ptr right-used
1328                                                       right-buf
1329                                                       (+ right-ptr new-entry-size bytes-moved)
1330                                                       right-page)
1331                             (incf right-ptr new-entry-size)
1332                             (%copy-byte-array-portion node-buf node-ptr bytes-moved
1333                                                       right-buf right-ptr right-page)
1334                             (accessing-byte-array (node-buf nil node-page)
1335                               (store.p right-node node-ptr))
1336                             (incf node-ptr 4)
1337                             (accessing-byte-array (node-buf nil node-page)
1338                               (fill.b node-ptr 0 bytes-moved))
1339                             (debug-break "Exiting shift-right")
1340                             ))
1341                         (update-free-and-used ()
1342                           (accessing-byte-array (node-buf node-buf-offset node-page)
1343                             (store.w new-used $btree_used)
1344                             (store.w new-free $btree_free)
1345                             (store.w (- (load.uw $btree_count) count-diff) $btree_count))
1346                           (accessing-byte-array (right-buf right-buf-offset right-page)
1347                             (store.w new-right-used $btree_used)
1348                             (store.w new-right-free $btree_free)
1349                             (store.w (+ (load.uw $btree_count) count-diff) $btree_count))
1350                           (debug-break "Free and used updated")
1351                           )
1352                         (replace-parent-entry (i &optional string (size (aref sizes i)))
1353                           (declare (fixnum size))
1354                           (with-locked-page (disk-cache parent t parent-buf parent-buf-offset nil parent-page)
1355                             (accessing-byte-array (parent-buf parent-buf-offset parent-page)
1356                               (let* ((last-entry-ptr (+ node-buf-offset $btree_data
1357                                                         (- new-used size)))
1358                                      (length (if string
1359                                                (length string)
1360                                                (accessing-byte-array (node-buf)
1361                                                  (load.b last-entry-ptr))))
1362                                      (temp-string (make-string length :element-type 'base-character)))
1363                                 (declare (fixnum last-entry-ptr length)
1364                                          (dynamic-extent temp-string))
1365                                 (unless string
1366                                   (debug-break "Filling string")
1367                                   (setq string temp-string)
1368                                   (%copy-byte-array-portion node-buf (1+ last-entry-ptr) length
1369                                                             string 0))
1370                                 (let* ((old-size (- right-offset node-offset))
1371                                        (size-diff (- old-size size))
1372                                        (ptr (+ parent-buf-offset right-offset)))
1373                                   (declare (fixnum old-size size-diff ptr))
1374                                   (unless (eql size-diff 0)
1375                                     (let* ((parent-used (load.uw $btree_used))
1376                                            (parent-free (load.uw $btree_free))
1377                                            (bytes-to-move (- parent-used (- right-offset $btree_data))))
1378                                       (declare (fixnum parent-used parent-free bytes-to-move))
1379                                       (debug-break "About to shift parent tail")
1380                                       (%copy-byte-array-portion
1381                                        parent-buf ptr bytes-to-move
1382                                        parent-buf (- ptr size-diff) parent-page)
1383                                       (when (> size-diff 0)
1384                                         (incf ptr (- bytes-to-move size-diff))
1385                                         (accessing-byte-array (parent-buf nil parent-page)
1386                                           (fill.b ptr 0 size-diff)))
1387                                       (store.w (- parent-used size-diff) $btree_used)
1388                                       (store.w (+ parent-free size-diff) $btree_free)))
1389                                   (setq ptr (+ parent-buf-offset node-offset))
1390                                   (debug-break "About to enter node string in parent")
1391                                   (%store-btree-entry parent-buf ptr parent-page
1392                                                       string length (load.p node-offset) nil size)))))))
1393                    (declare (dynamic-extent #'shift-right #'update-free-and-used
1394                                             #'replace-parent-entry))
1395                    (loop for i from (1- count) downto 0 do
1396                          (when (>= bytes-moved right-free)
1397                            ; Ran out of room in right node
1398                            (return nil))
1399                          (when (>= bytes-moved bytes-needed)
1400                            ; The new entry now fits in node
1401                            (setq count-diff (- count i 1))
1402                            (shift-right)
1403                            (update-free-and-used)
1404                            (if (eql (- offset $btree_data) (- new-used 4))
1405                              (replace-parent-entry i key-string size)
1406                              (replace-parent-entry i))
1407                            (%btree-insert-in-node disk-cache btree node offset
1408                                                   key-string value value-imm? key-length)
1409                            (debug-break "Done with insertion in node")
1410                            ;                            #+bill (check-btree-consistency disk-cache btree)
1411                            (return t))
1412                          (when (>= bytes-moved bytes-to-offset)
1413                            (unless (eql bytes-moved bytes-to-offset)
1414                              (error "Inconsistency: offset was not at an entry boundary"))
1415                            (unless (>= new-right-free (+ size bytes-moved))
1416                              (debug-break "Couldn't shift right")
1417                              (return nil))
1418                            ; The new entry fits at the beginning of right-node
1419                            (setq count-diff (- count i 1))
1420                            (shift-right size)
1421                            (let ((right-ptr (+ right-buf-offset $btree_data)))
1422                              (declare (fixnum right-ptr))
1423                              (debug-break "Storing new entry in right neighbor")
1424                              (%store-btree-entry right-buf right-ptr right-page
1425                                                  key-string key-length value value-imm? size)
1426                              (incf new-right-used size)
1427                              (decf new-right-free size)
1428                              (update-free-and-used)
1429                              (accessing-byte-array (right-buf right-buf-offset)
1430                                (store.w (1+ (load.uw $btree_count)) $btree_count))
1431                              (replace-parent-entry i)
1432                              (debug-break "Done with insertion in right neighbor")
1433                              ;                          #+bill (check-btree-consistency disk-cache btree)
1434                              (return t)))
1435                          (incf bytes-moved (aref sizes i)))))))))))))
1436
1437; Much like btree-find-leaf-node, but it makes sure there's room
1438; for an entry of max-key-size in every node on the way to the leaf.
1439; This is simpler than letting node splits "bubble up" and it also
1440; works better in a multi-processing environment (this code does
1441; not yet work in a multi-processing environment, but using this
1442; algorithm will allow multiple processes to access a single btree
1443; at the same time).
1444(defun %create-insertion-path (disk-cache btree key-string max-key-size)
1445  (let ((node (dc-%svref disk-cache btree $btree.root))
1446        (case-sensitive? (not (logbitp $btree-type_string-equal-bit
1447                                       (dc-%svref disk-cache btree $btree.type)))))
1448    (loop
1449      (accessing-disk-cache (disk-cache node)
1450        (let* ((offset (%btree-search-node
1451                        disk-cache node key-string case-sensitive?))
1452               (flags (load.w $btree_flags))
1453               (leaf-p (logbitp $btree_flags.leaf-bit flags))
1454               (root-p (logbitp $btree_flags.root-bit flags)))
1455          (when leaf-p
1456            (when root-p
1457              (%make-new-root-node disk-cache btree node))
1458            (return t))
1459          (let ((free (load.uw $btree_free)))
1460            (if (>= free max-key-size)
1461              (setq node (read-long disk-cache (+ node offset)))
1462              (progn
1463                (when root-p
1464                  (%make-new-root-node disk-cache btree node))
1465                (%split-node disk-cache btree node free (load.uw $btree_used) nil)
1466                (setq node (load.p $btree_parent))))))     ; may have moved to new right neighbor
1467        (require-satisfies dc-vector-subtype-p
1468                           disk-cache node $v_btree-node)))))
1469
1470; node is the current root node. Creates a new root node
1471; with node as its only child.
1472(defun %make-new-root-node (disk-cache btree node)
1473  (let ((root (dc-cons-btree-node disk-cache btree btree (ash 1 $btree_flags.root-bit))))
1474    (accessing-disk-cache (disk-cache root)
1475      (store.p node $btree_data))
1476    (setf (dc-%svref disk-cache btree $btree.root) root)
1477    (setf (dc-%svref disk-cache btree $btree.depth t)
1478          (1+ (dc-%svref disk-cache btree $btree.depth)))
1479    (accessing-disk-cache (disk-cache node)
1480      (store.p root $btree_parent)
1481      (store.w (logandc1 (ash 1 $btree_flags.root-bit) (load.w $btree_flags))
1482               $btree_flags))))
1483
1484; This will only ever do anything if you have large keys
1485; Maybe it's not worth the effort, but it doesn't happen very often
1486; (unless there are large keys, and then it's worth it)
1487(defun %sew-up-insertion-path (disk-cache btree node leaf-p)
1488  (accessing-disk-cache (disk-cache node)
1489    (let* ((free (load.uw $btree_free))
1490           (used (load.uw $btree_used))
1491           (count (load.uw $btree_count))
1492           (unmerged-node (%btree-merge-with-neighbors
1493                           disk-cache btree node free used count leaf-p)))
1494      (when unmerged-node
1495        (accessing-disk-cache (disk-cache unmerged-node)
1496          (%sew-up-insertion-path disk-cache btree (load.p $btree_parent) nil))))))
1497
1498;; Store a single entry into a buffer.
1499(defun %store-btree-entry (buf offset page string string-length value value-imm? &optional size)
1500  (declare (fixnum offset string-length))
1501  (let ((p offset))
1502    (declare (fixnum p))
1503    (accessing-byte-array (buf nil page)
1504      (store.p value p value-imm?)
1505      (store.b string-length (incf p 4))
1506      (store.string string (incf p 1) string-length)
1507      (incf p string-length)
1508      (let* ((bytes (+ 5 string-length))
1509             (filler (- (or size (setq size (normalize-size bytes 4)))
1510                        bytes)))
1511        (declare (fixnum bytes filler))
1512        (when (> filler 0)
1513          ; This is for us poor humans.
1514          (fill.b p 0 filler)))))
1515  size)
1516
1517
1518;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1519;;
1520;; Code to support deletion
1521;;
1522
1523(defun %btree-delete-from-node (disk-cache btree node offset leaf-p)
1524  (declare (fixnum offset))
1525  (with-locked-page (disk-cache node t node-buf node-buf-offset nil node-page)
1526    (accessing-byte-array (node-buf node-buf-offset node-page)
1527      (let* ((size (normalize-size (+ 5 (load.b (+ offset 4))) 4))
1528             (free (load.uw $btree_free))
1529             (new-free (+ free size))
1530             (used (load.uw $btree_used))
1531             (new-used (- used size))
1532             (count (load.uw $btree_count))
1533             (new-count (1- count))
1534             (p (+ node-buf-offset offset))
1535             (bytes-to-copy (- used size (- offset $btree_data))))
1536        (declare (fixnum size free new-free used new-used count new-count p bytes-to-copy))
1537        (debug-break "About to delete from node")
1538        (%copy-byte-array-portion node-buf (+ p size) bytes-to-copy
1539                                  node-buf p node-page)
1540        (fill.b (+ $btree_data new-used) 0 size)
1541        (store.w new-free $btree_free)
1542        (store.w new-used $btree_used)
1543        (store.w new-count $btree_count)
1544        (debug-break "Deleted from node")
1545        (%btree-merge-with-neighbors
1546         disk-cache btree node new-free new-used new-count leaf-p)))))
1547
1548; Returns the last node on the way up the parent chain that could not be
1549; merged with either neighbor, or NIL if it got all the way to the root.
1550
1551(defun %btree-merge-with-neighbors (disk-cache btree node free used count leaf-p)
1552  (if (%btree-root-node-p disk-cache node)
1553    (when (eql 0 count)
1554      ; delete the root node, unless it is also the leaf node
1555      (unless (%btree-leaf-node-p disk-cache node)
1556        (accessing-disk-cache (disk-cache node)
1557          (unless (eql 4 (load.uw $btree_used))
1558            (error "Inconsistency: root should be empty, but isn't"))
1559          (let ((new-root (load.p $btree_data)))
1560            (debug-break "About to install new root")
1561            (setf (dc-%svref disk-cache btree $btree.root) new-root)
1562            (setf (dc-%svref disk-cache btree $btree.depth t)
1563                  (1- (dc-%svref disk-cache btree $btree.depth)))
1564            (accessing-disk-cache (disk-cache new-root)
1565              (store.p btree $btree_parent)
1566              (store.w (logior (ash 1 $btree_flags.root-bit)
1567                               (load.uw $btree_flags))
1568                       $btree_flags))
1569            (debug-break "Installed new root")
1570            (dc-free-btree-node disk-cache btree node)
1571            nil))))
1572    (let ((parent (accessing-disk-cache (disk-cache node) (load.p $btree_parent))))
1573      (multiple-value-bind (node-offset left-offset) (%btree-search-for-subnode disk-cache parent node)
1574        (unless node-offset
1575          (error "Inconsistency: didn't find node: ~s in parent: ~s" node parent))
1576        (accessing-disk-cache (disk-cache parent)
1577          (when left-offset
1578            (let ((left-node (load.p left-offset))
1579                  ; Initializes parent size to -4 if leaf-p, which is correct since
1580                  ; when merging two nodes, the pointer from the left node to the
1581                  ; right node is eliminated, making the data take 4 bytes less.
1582                  (parent-size (- (if leaf-p 0 (- node-offset left-offset)) 4)))
1583              (declare (fixnum parent-size))
1584              (accessing-disk-cache (disk-cache left-node)
1585                (let ((left-free (load.uw $btree_free)))
1586                  (declare (fixnum left-free))
1587                  (when (>= left-free (+ used parent-size))
1588                    (debug-break "About to merge with left neighbor")
1589                    (return-from %btree-merge-with-neighbors
1590                      (%btree-merge-nodes
1591                       disk-cache btree left-node node parent left-offset node-offset leaf-p)))))))
1592          (let ((end-offset (+ $btree_data -4 (load.uw $btree_used))))
1593            (declare (fixnum end-offset))
1594            (when (< node-offset end-offset)
1595              (let* ((right-length (load.b (+ node-offset 4)))
1596                     (right-offset (+ node-offset (normalize-size (+ 5 right-length) 4)))
1597                     (right-node (load.p right-offset))
1598                     (parent-size (- (if leaf-p 0 (- right-offset node-offset)) 4)))
1599                (declare (fixnum right-offset parent-size))
1600                (accessing-disk-cache (disk-cache right-node)
1601                  (let ((right-used (load.uw $btree_used)))
1602                    (when (>= free (+ right-used parent-size))
1603                      (debug-break "About to merge with right neighbor")
1604                      (return-from %btree-merge-with-neighbors
1605                        (%btree-merge-nodes
1606                         disk-cache btree node right-node parent node-offset right-offset leaf-p))))))))
1607          node)))))
1608
1609; We know that there's room to merge the nodes. Do it.
1610; It's important that this code merges into the LEFT node as that ensures that the
1611; first leaf node remains constant (the btree points at it and p-map-btree relies on that fact).
1612; (You could instead update $btree.first-leaf as necessary).
1613(defun %btree-merge-nodes (disk-cache btree left-node right-node parent left-offset right-offset leaf-p)
1614  (declare (fixnum left-offset right-offset))
1615  (with-locked-page (disk-cache parent nil parent-buf parent-buf-offset)
1616    (with-locked-page (disk-cache left-node t left-buf left-buf-offset nil left-page)
1617      (with-locked-page (disk-cache right-node nil right-buf right-buf-offset)
1618        (accessing-byte-array (left-buf left-buf-offset)
1619          (let* ((p (+ left-buf-offset $btree_data (load.uw $btree_used)))
1620                 (right-used (accessing-byte-array (right-buf right-buf-offset)
1621                               (load.uw $btree_used)))
1622                 (count-inc (accessing-byte-array (right-buf right-buf-offset)
1623                              (load.uw $btree_count)))
1624                 (used-inc right-used))
1625            (declare (fixnum p right-used count-inc used-inc))
1626            (if leaf-p
1627              (progn
1628                (decf p 4)
1629                (decf used-inc 4))
1630              (let ((size (- right-offset left-offset 4)))
1631                (declare (fixnum size))
1632                (debug-break "About to copy parent info")
1633                (%copy-byte-array-portion parent-buf (+ parent-buf-offset left-offset 4) size
1634                                          left-buf p left-page)
1635                (incf used-inc size)
1636                (incf count-inc)
1637                (incf p size)))
1638            (debug-break "About to copy right-buf info")
1639            (%copy-byte-array-portion right-buf (+ right-buf-offset $btree_data) right-used
1640                                      left-buf p left-page)
1641            (store.w (+ (load.uw $btree_used) used-inc) $btree_used)
1642            (store.w (- (load.uw $btree_free) used-inc) $btree_free)
1643            (store.w (+ (load.uw $btree_count) count-inc) $btree_count)
1644            (dc-free-btree-node disk-cache btree right-node)
1645            (accessing-byte-array (parent-buf parent-buf-offset)
1646              (store.p left-node right-offset))
1647            (unless leaf-p
1648              (%btree-update-childrens-parents disk-cache left-node p))
1649            (debug-break "Nodes merged")
1650            (%btree-delete-from-node disk-cache btree parent left-offset nil)))))))
1651
1652;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1653;;;
1654;;; Consistency checking and statistics gathering
1655;;;
1656
1657(defun check-btree (btree)
1658  (check-btree-consistency (pheap-disk-cache (pptr-pheap btree)) (pptr-pointer btree)))
1659
1660; Checks consistency and returns how full the btree is.
1661(defun check-btree-consistency (disk-cache btree &optional check-nodes-and-count?)
1662  (let ((root (accessing-disk-cache (disk-cache)
1663                (svref.p btree $btree.root))))
1664    (multiple-value-bind (free used nodes count leaf-free leaf-used leaf-nodes non-leaf-free non-leaf-used non-leaf-nodes)
1665                         (check-btree-node-consistency disk-cache root btree)
1666      (when check-nodes-and-count?
1667        (let ((missing-nodes (- (accessing-disk-cache (disk-cache)
1668                                  (svref.p btree $btree.nodes))
1669                                nodes)))
1670          (unless (eql missing-nodes 0)
1671            (cerror "Continue" "~d. missing nodes" missing-nodes)))
1672        (let ((missing-entries (- (accessing-disk-cache (disk-cache)
1673                                    (svref.p btree $btree.count))
1674                                  count)))
1675          (unless (eql 0 missing-entries)
1676            (cerror "Continue" "~d. missing entries" missing-entries))))
1677      (values (if (eql used 0) 0 (/ used (float (+ free used))))
1678              nodes
1679              count
1680              (if (eql leaf-used 0) 0 (/ leaf-used (float (+ leaf-free leaf-used))))
1681              leaf-nodes
1682              (if (eql non-leaf-used 0) 0 (/ non-leaf-used (float (+ non-leaf-free non-leaf-used))))
1683              non-leaf-nodes))))
1684
1685(defun check-btree-node-consistency (disk-cache node parent)
1686  (require-satisfies dc-vector-subtype-p disk-cache node $v_btree-node)
1687  (accessing-disk-cache (disk-cache node)
1688    (let* ((vector-size (%vector-size.p node))
1689           (free (load.uw $btree_free))
1690           (used (load.uw $btree_used))
1691           (count (load.uw $btree_count))
1692           (nodes 1)
1693           (sizes (make-array (the fixnum (1+ count))))
1694           (leaf? (%btree-leaf-node-p disk-cache node))
1695           (total-count (if leaf? count 0))
1696           (p $btree_data)
1697           (leaf-free 0)
1698           (leaf-used 0)
1699           (leaf-nodes 0)
1700           (non-leaf-free 0)
1701           (non-leaf-used 0)
1702           (non-leaf-nodes 0))
1703      (declare (fixnum free used count p leaf-free leaf-used leaf-nodes non-leaf-free non-leaf-used non-leaf-nodes)
1704               (dynamic-extent sizes))
1705      (if leaf?
1706        (setq leaf-free free leaf-used used leaf-nodes 1)
1707        (setq non-leaf-free free non-leaf-used used non-leaf-nodes 1))
1708      (unless (eql parent (load.l $btree_parent))
1709        (error "parent should be: #x~x, was: #x~x" parent (load.l $btree_parent)))
1710      (unless (eql (- vector-size (- $btree_data $v_data)) (+ free used))
1711        (cerror "Continue."
1712                "~&(+ free used) is wrong. Node: #x~x, free: #x~x, used: #x~x~%"
1713                node free used))
1714      (%lookup-node-sizes disk-cache node sizes count)
1715      (setf (aref sizes count) 0)
1716      (unless leaf?
1717        (dotimes (i (1+ count))
1718          (multiple-value-bind (c-free c-used c-nodes c-count
1719                                       c-leaf-free c-leaf-used c-leaf-nodes
1720                                       c-non-leaf-free c-non-leaf-used c-non-leaf-nodes)
1721                               (check-btree-node-consistency disk-cache (load.l p) node)
1722            (incf free c-free)
1723            (incf used c-used)
1724            (incf leaf-free c-leaf-free)
1725            (incf leaf-used c-leaf-used)
1726            (incf leaf-nodes c-leaf-nodes)
1727            (incf non-leaf-free c-non-leaf-free)
1728            (incf non-leaf-used c-non-leaf-used)
1729            (incf nodes c-nodes)
1730            (incf non-leaf-nodes c-non-leaf-nodes)
1731            (incf total-count c-count))
1732          (incf p (aref sizes i))))
1733      (values free used nodes total-count leaf-free leaf-used leaf-nodes non-leaf-free non-leaf-used non-leaf-nodes))))
1734
1735
1736#|
1737; Test code. Stores symbols in a btree.
1738(defun init-temp-btree ()
1739  (declare (special pheap dc b))
1740  (when (boundp 'pheap)
1741    (close-pheap pheap))
1742  (delete-file "temp.pheap")
1743  (create-pheap "temp.pheap")
1744  (setq pheap (open-pheap "temp.pheap")
1745        dc (pheap-disk-cache pheap))
1746  (dolist (w (windows :class 'inspector::inspector-window))
1747    (window-close w))
1748  (setq b (dc-make-btree dc))
1749  #+ignore
1750  (let ((w (inspect dc)))
1751    (set-view-size w #@(413 384))
1752    (scroll-to-address (inspector::inspector-view w) (dc-%svref dc b $btree.root))))
1753
1754(defvar *symbols* nil)
1755(defvar *value-offset* 0)
1756
1757(defun *symbols* ()
1758  (let ((syms *symbols*))
1759    (unless syms
1760      (let ((hash (make-hash-table :test 'equal)))
1761        (do-symbols (s)
1762          (unless (gethash (symbol-name s) hash)
1763            (setf (gethash (symbol-name s) hash) t)
1764            (push s syms))))
1765      (setq *symbols* syms
1766            *value-offset* 0))
1767    syms))
1768
1769(defun store-symbols (&optional (step-sym 0) check? (check-sym 0))
1770  (declare (special dc b))
1771  (let ((syms (*symbols*))
1772        (check-check-sym? nil)
1773        (i 0))
1774    (dolist (s syms)
1775      (let ((string (symbol-name s))
1776            (value (+ i *value-offset*)))
1777        (if (eq s step-sym)
1778          (step
1779           (dc-btree-store dc b string (require-type value 'fixnum) t))
1780          (dc-btree-store dc b string (require-type value 'fixnum) t))
1781        (when (eql s check-sym) (setq check-check-sym? i))
1782        (incf i)
1783        (when check-check-sym?
1784          (unless (eql check-check-sym? (dc-btree-lookup dc b (symbol-name check-sym)))
1785            (cerror "Continue" "Can't find ~s" check-sym)))
1786        (when (and check? (or (not (fixnump check?))
1787                              (eql 0 (mod i check?))))
1788          (format t "~&Checking ~d..." i)
1789          (check-symbols s)
1790          (terpri))))
1791    i))
1792
1793(defun check-symbols (&optional (upto-and-including 0))
1794  (declare (special dc b))
1795  (let ((i 0))
1796    (dolist (s (*symbols*))
1797      (let ((was (dc-btree-lookup dc b (symbol-name s)))
1798            (value (+ i *value-offset*)))
1799        (unless (eql was value)
1800          (cerror "Continue"
1801                  "Sym: ~s, was: ~s, sb: ~s" s was value))
1802        (incf i)
1803        (when (eq s upto-and-including)
1804          (return))))
1805    i))
1806
1807(defun delete-symbols (&optional (count nil) (check-period nil))
1808  (declare (special dc b))
1809  (let ((check-count (or check-period most-positive-fixnum)))
1810    (dotimes (i (or count (length *symbols*)))
1811      (when (null *symbols*) (return))
1812      (incf *value-offset*)
1813      (dc-btree-delete dc b (symbol-name (pop *symbols*)))
1814      (when (<= (decf check-count) 0)
1815        (setq check-count check-period)
1816        (format t "~&Checking ~d..." i)
1817        (check-symbols)
1818        (terpri)))))
1819
1820(defun sort-syms-upto (sym)
1821  (let ((first-n (let ((res nil))
1822                   (dolist (s *symbols* (error "Not found"))
1823                     (push s res)
1824                     (when (eq s sym) (return res))))))
1825    (sort first-n #'string<)))
1826
1827(defun btree-test (&optional (step-sym 0))
1828  (init-temp-btree)
1829  (store-symbols step-sym))
1830
1831(defun clear-disk-cache ()
1832  (unwind-protect
1833    (with-open-file (s "temp.temp" :direction :output :if-exists :overwrite)
1834      (file-length s (* 256 1024))
1835      (dotimes (i 512)
1836        (file-position s (* i 512))
1837        (tyo #\f s)))
1838    (delete-file "temp.temp")))
1839
1840(defun time-btree-store (&optional (swap-space-in-k 20))
1841  (let* ((syms (*symbols*))
1842         (syms-count (length syms))
1843         (index 0))
1844    (declare (fixnum index))
1845    (clear-disk-cache)
1846    (gc)
1847    (let ((time (get-internal-real-time)))
1848      (with-open-pheap (p "temp.pheap"
1849                          :if-exists :supersede
1850                          :if-does-not-exist :create
1851                          :swapping-space (* swap-space-in-k 1024)
1852                          :page-size 512)
1853        (let ((b (p-make-btree p)))
1854          (setf (root-object p) b)
1855          (dolist (s syms)
1856            (setf (p-btree-lookup b (symbol-name s)) (incf index))))
1857        (let ((total-time (/ (float (- (get-internal-real-time) time))
1858                             internal-time-units-per-second))
1859              (file-length (with-open-file (s "temp.pheap") (file-length s))))
1860          (format t "~&Total time: ~d~%Elements: ~d~%time/element: ~d~%file length: ~d"
1861                  total-time
1862                  syms-count
1863                  (/ total-time syms-count)
1864                  file-length))))))
1865
1866(defun time-btree-read (&optional (swap-space-in-k 20))
1867  (let* ((syms (*symbols*))
1868         (syms-count (length syms)))
1869    (clear-disk-cache)
1870    (gc)
1871    (let ((time (get-internal-real-time)))
1872      (with-open-pheap (p "temp.pheap"
1873                          :swapping-space (* swap-space-in-k 1024))
1874        (let ((b (root-object p)))
1875          (dolist (s syms)
1876            (p-btree-lookup b (symbol-name s)))))
1877      (let ((total-time (/ (float (- (get-internal-real-time) time))
1878                           internal-time-units-per-second))
1879            (file-length (with-open-file (s "temp.pheap") (file-length s))))
1880        (format t "~&Total time: ~d~%Elements: ~d~%time/element: ~d~%file length: ~d"
1881                total-time
1882                syms-count
1883                (/ total-time syms-count)
1884                file-length)))))
1885
1886
1887|#
1888
1889#|
1890; Code to trace functions that were hard to debug.
1891(advise %btree-insert-in-node
1892        (destructuring-bind (dc b node offset key-string value &optional value-imm? (key-length (length key-string))) arglist
1893          (declare (ignore offset value value-imm?))
1894          (if (or (%btree-leaf-node-p dc node)
1895                  (<= (normalize-size (+ 5 key-length))
1896                      (accessing-disk-cache (dc node) (load.uw $btree_free))))
1897            (:do-it)
1898            (step (:do-it))))
1899        :when :around)
1900
1901(advise %balance-inner-node-after-deletion
1902        (step (:do-it))
1903        :when :around)
1904|#
1905;;;    1   3/10/94  bill         1.8d247
1906;;;    2   7/26/94  Derek        1.9d027
1907;;;    3  10/04/94  bill         1.9d071
1908;;;    4  11/01/94  Derek        1.9d085 Bill's Saving Library Task
1909;;;    5  11/03/94  Moon         1.9d086
1910;;;    2   2/18/95  RŽti         1.10d019
1911;;;    3   3/23/95  bill         1.11d010
1912;;;    4   4/19/95  bill         1.11d021
1913;;;    5   6/02/95  bill         1.11d040
Note: See TracBrowser for help on using the repository browser.