2 * linux/fs/hfs/binsert.c
4 * Copyright (C) 1995-1997 Paul H. Hargrove
5 * This file may be distributed under the terms of the GNU General Public License.
7 * This file contains the code to insert records in a B-tree.
9 * "XXX" in a comment is a note to myself to consider changing something.
11 * In function preconditions the term "valid" applied to a pointer to
12 * a structure means that the pointer is non-NULL and the structure it
13 * points to has all fields initialized to consistent values.
16 #include "hfs_btree.h"
18 /*================ File-local functions ================*/
20 /* btree locking functions */
21 static inline void hfs_btree_lock(struct hfs_btree *tree)
24 hfs_sleep_on(&tree->wait);
28 static inline void hfs_btree_unlock(struct hfs_btree *tree)
31 hfs_wake_up(&tree->wait);
38 * Inserts a record in a given bnode known to have sufficient space.
40 * struct hfs_brec* brec: pointer to the brec for the insertion
41 * struct hfs_belem* belem: the element in the search path to insert in
42 * struct hfs_bkey* key: pointer to the key for the record to insert
43 * void* data: pointer to the record to insert
44 * hfs_u16 keysize: size of the key to insert
45 * hfs_u16 datasize: size of the record to insert
51 * 'brec' points to a valid (struct hfs_brec).
52 * 'belem' points to a valid (struct hfs_belem) in 'brec', the node
53 * of which has enough free space to insert 'key' and 'data'.
54 * 'key' is a pointer to a valid (struct hfs_bkey) of length 'keysize'
55 * which, in sorted order, belongs at the location indicated by 'brec'.
56 * 'data' is non-NULL an points to appropriate data of length 'datasize'
58 * The record has been inserted in the position indicated by 'brec'.
60 static void binsert_nonfull(struct hfs_brec *brec, struct hfs_belem *belem,
61 const struct hfs_bkey *key, const void *data,
62 hfs_u8 keysize, hfs_u16 datasize)
64 int i, rec, nrecs, size, tomove;
66 struct hfs_bnode *bnode = belem->bnr.bn;
68 rec = ++(belem->record);
69 size = ROUND(keysize+1) + datasize;
70 nrecs = bnode->ndNRecs + 1;
71 tomove = bnode_offset(bnode, nrecs) - bnode_offset(bnode, rec);
73 /* adjust the record table */
74 for (i = nrecs; i >= rec; --i) {
75 hfs_put_hs(bnode_offset(bnode,i) + size, RECTBL(bnode,i+1));
79 start = bnode_key(bnode, rec);
80 memmove(start + size, start, tomove);
82 /* copy in the key and the data*/
84 keysize = ROUND(keysize+1);
85 memcpy(start + 1, (hfs_u8 *)key + 1, keysize-1);
86 memcpy(start + keysize, data, datasize);
88 /* update record count */
96 * Adds a new root to a B*-tree, increasing its height.
98 * struct hfs_btree *tree: the tree to add a new root to
99 * struct hfs_bnode *left: the new root's first child or NULL
100 * struct hfs_bnode *right: the new root's second child or NULL
101 * Output Variable(s):
106 * 'tree' points to a valid (struct hfs_btree).
107 * 'left' and 'right' point to valid (struct hfs_bnode)s, which
108 * resulted from splitting the old root node, or are both NULL
109 * if there was no root node before.
111 * Upon success a new root node is added to 'tree' with either
112 * two children ('left' and 'right') or none.
114 static void add_root(struct hfs_btree *tree,
115 struct hfs_bnode *left,
116 struct hfs_bnode *right)
118 struct hfs_bnode_ref bnr;
119 struct hfs_bnode *root;
120 struct hfs_bkey *key;
121 int keylen = tree->bthKeyLen;
123 if (left && !right) {
124 hfs_warn("add_root: LEFT but no RIGHT\n");
128 bnr = hfs_bnode_alloc(tree);
129 if (!(root = bnr.bn)) {
133 root->sticky = HFS_STICKY;
135 tree->bthRoot = root->node;
138 root->ndNHeight = tree->bthDepth;
144 root->ndType = ndLeafNode;
147 tree->bthFNode = root->node;
148 tree->bthLNode = root->node;
150 root->ndType = ndIndxNode;
153 hfs_put_hs(sizeof(struct NodeDescriptor) + ROUND(1+keylen) +
154 sizeof(hfs_u32), RECTBL(root, 2));
155 key = bnode_key(root, 1);
156 key->KeyLen = keylen;
158 ((struct hfs_bkey *)bnode_key(left, 1))->value, keylen);
159 hfs_put_hl(left->node, bkey_record(key));
161 hfs_put_hs(sizeof(struct NodeDescriptor) + 2*ROUND(1+keylen) +
162 2*sizeof(hfs_u32), RECTBL(root, 3));
163 key = bnode_key(root, 2);
164 key->KeyLen = keylen;
166 ((struct hfs_bkey *)bnode_key(right, 1))->value, keylen);
167 hfs_put_hl(right->node, bkey_record(key));
169 /* the former root (left) is now just a normal node */
170 left->sticky = HFS_NOT_STICKY;
171 if ((left->next = bhash(tree, left->node))) {
172 left->next->prev = left;
174 bhash(tree, left->node) = left;
176 hfs_bnode_relse(&bnr);
181 * insert_empty_bnode()
184 * Adds an empty node to the right of 'left'.
186 * struct hfs_btree *tree: the tree to add a node to
187 * struct hfs_bnode *left: the node to add a node after
188 * Output Variable(s):
191 * struct hfs_bnode_ref *: reference to the new bnode.
193 * 'tree' points to a valid (struct hfs_btree) with at least 1 free node.
194 * 'left' points to a valid (struct hfs_bnode) belonging to 'tree'.
196 * If NULL is returned then 'tree' and 'left' are unchanged.
197 * Otherwise a node with 0 records is inserted in the tree to the right
198 * of the node 'left'. The 'ndFLink' of 'left' and the 'ndBLink' of
199 * the former right-neighbor of 'left' (if one existed) point to the
200 * new node. If 'left' had no right neighbor and is a leaf node the
201 * the 'bthLNode' of 'tree' points to the new node. The free-count and
202 * bitmap for 'tree' are kept current by hfs_bnode_alloc() which supplies
205 static struct hfs_bnode_ref insert_empty_bnode(struct hfs_btree *tree,
206 struct hfs_bnode *left)
208 struct hfs_bnode_ref retval;
209 struct hfs_bnode_ref right;
211 retval = hfs_bnode_alloc(tree);
213 hfs_warn("hfs_binsert: out of bnodes?.\n");
216 retval.bn->sticky = HFS_NOT_STICKY;
217 if ((retval.bn->next = bhash(tree, retval.bn->node))) {
218 retval.bn->next->prev = retval.bn;
220 bhash(tree, retval.bn->node) = retval.bn;
223 right = hfs_bnode_find(tree, left->ndFLink, HFS_LOCK_WRITE);
225 hfs_warn("hfs_binsert: corrupt btree.\n");
226 hfs_bnode_bitop(tree, retval.bn->node, 0);
227 hfs_bnode_relse(&retval);
230 right.bn->ndBLink = retval.bn->node;
231 hfs_bnode_relse(&right);
232 } else if (left->ndType == ndLeafNode) {
233 tree->bthLNode = retval.bn->node;
237 retval.bn->ndFLink = left->ndFLink;
238 retval.bn->ndBLink = left->node;
239 retval.bn->ndType = left->ndType;
240 retval.bn->ndNHeight = left->ndNHeight;
241 retval.bn->ndNRecs = 0;
243 left->ndFLink = retval.bn->node;
253 * Splits an over full node during insertion.
254 * Picks the split point that results in the most-nearly equal
255 * space usage in the new and old nodes.
257 * struct hfs_belem *elem: the over full node.
258 * int size: the number of bytes to be used by the new record and its key.
259 * Output Variable(s):
260 * struct hfs_belem *elem: changed to indicate where the new record
261 * should be inserted.
263 * struct hfs_bnode_ref: reference to the new bnode.
265 * 'elem' points to a valid path element corresponding to the over full node.
266 * 'size' is positive.
268 * The records in the node corresponding to 'elem' are redistributed across
269 * the old and new nodes so that after inserting the new record, the space
270 * usage in these two nodes is as equal as possible.
271 * 'elem' is updated so that a call to binsert_nonfull() will insert the
272 * new record in the correct location.
274 static inline struct hfs_bnode_ref split(struct hfs_belem *elem, int size)
276 struct hfs_bnode *bnode = elem->bnr.bn;
277 int nrecs, cutoff, index, tmp, used, in_right;
278 struct hfs_bnode_ref right;
280 right = insert_empty_bnode(bnode->tree, bnode);
282 nrecs = bnode->ndNRecs;
283 cutoff = (size + bnode_end(bnode) -
284 sizeof(struct NodeDescriptor) +
285 (nrecs+1)*sizeof(hfs_u16))/2;
288 /* note that this only works because records sizes are even */
289 for (index=1; index <= elem->record; ++index) {
290 tmp = (sizeof(hfs_u16) + bnode_rsize(bnode, index))/2;
297 tmp = (size + sizeof(hfs_u16))/2;
304 for (; index <= nrecs; ++index) {
305 tmp = (sizeof(hfs_u16) + bnode_rsize(bnode, index))/2;
312 /* couldn't find the split point! */
313 hfs_bnode_relse(&right);
320 elem->record -= index-1;
322 hfs_bnode_shift_right(bnode, right.bn, index);
331 * Inserts a record in a tree known to have enough room, even if the
332 * insertion requires the splitting of nodes.
334 * struct hfs_brec *brec: partial path to the node to insert in
335 * const struct hfs_bkey *key: key for the new record
336 * const void *data: data for the new record
337 * hfs_u8 keysize: size of the key
338 * hfs_u16 datasize: size of the data
339 * int reserve: number of nodes reserved in case of splits
340 * Output Variable(s):
343 * int: 0 on success, error code on failure
345 * 'brec' points to a valid (struct hfs_brec) corresponding to a
346 * record in a leaf node, after which a record is to be inserted,
347 * or to "record 0" of the leaf node if the record is to be inserted
348 * before all existing records in the node. The (struct hfs_brec)
349 * includes all ancestors of the leaf node that are needed to
350 * complete the insertion including the parents of any nodes that
352 * 'key' points to a valid (struct hfs_bkey) which is appropriate
353 * to this tree, and which belongs at the insertion point.
354 * 'data' points data appropriate for the indicated node.
355 * 'keysize' gives the size in bytes of the key.
356 * 'datasize' gives the size in bytes of the data.
357 * 'reserve' gives the number of nodes that have been reserved in the
358 * tree to allow for splitting of nodes.
360 * All 'reserve'd nodes have been either used or released.
362 * On success the key and data have been inserted at the indicated
363 * location in the tree, all appropriate fields of the in-core data
364 * structures have been changed and updated versions of the on-disk
365 * data structures have been scheduled for write-back to disk.
366 * On failure the B*-tree is probably invalid both on disk and in-core.
368 * XXX: Some attempt at repair might be made in the event of failure,
369 * or the fs should be remounted read-only so things don't get worse.
371 static int binsert(struct hfs_brec *brec, const struct hfs_bkey *key,
372 const void *data, hfs_u8 keysize, hfs_u16 datasize,
375 struct hfs_bnode_ref left, right, other;
376 struct hfs_btree *tree = brec->tree;
377 struct hfs_belem *belem = brec->bottom;
378 int tmpsize = 1 + tree->bthKeyLen;
379 struct hfs_bkey *tmpkey = hfs_malloc(tmpsize);
382 while ((belem >= brec->top) && (belem->flags & HFS_BPATH_OVERFLOW)) {
384 if (left.bn->ndFLink &&
385 hfs_bnode_in_brec(left.bn->ndFLink, brec)) {
386 hfs_warn("hfs_binsert: corrupt btree\n");
387 tree->reserved -= reserve;
388 hfs_free(tmpkey, tmpsize);
392 right = split(belem, ROUND(keysize+1) + ROUND(datasize));
396 hfs_warn("hfs_binsert: unable to split node!\n");
397 tree->reserved -= reserve;
398 hfs_free(tmpkey, tmpsize);
401 binsert_nonfull(brec, belem, key, data, keysize, datasize);
403 if (belem->bnr.bn == left.bn) {
405 if (belem->record == 1) {
406 hfs_bnode_update_key(brec, belem, left.bn, 0);
412 if (left.bn->node == tree->root->node) {
413 add_root(tree, left.bn, right.bn);
414 hfs_bnode_relse(&other);
419 datasize = sizeof(node);
420 node = htonl(right.bn->node);
422 keysize = tree->bthKeyLen;
423 memcpy(tmpkey, bnode_key(right.bn, 1), keysize+1);
424 hfs_bnode_relse(&other);
429 if (belem < brec->top) {
430 hfs_warn("hfs_binsert: Missing parent.\n");
431 tree->reserved -= reserve;
432 hfs_free(tmpkey, tmpsize);
436 binsert_nonfull(brec, belem, key, data, keysize, datasize);
439 tree->reserved -= reserve;
440 hfs_free(tmpkey, tmpsize);
444 /*================ Global functions ================*/
450 * This function inserts a new record into a b-tree.
452 * struct hfs_btree *tree: pointer to the (struct hfs_btree) to insert in
453 * struct hfs_bkey *key: pointer to the (struct hfs_bkey) to insert
454 * void *data: pointer to the data to associate with 'key' in the b-tree
455 * unsigned int datasize: the size of the data
456 * Output Variable(s):
459 * int: 0 on success, error code on failure
461 * 'tree' points to a valid (struct hfs_btree)
462 * 'key' points to a valid (struct hfs_bkey)
463 * 'data' points to valid memory of length 'datasize'
465 * If zero is returned then the record has been inserted in the
466 * indicated location updating all in-core data structures and
467 * scheduling all on-disk data structures for write-back.
469 int hfs_binsert(struct hfs_btree *tree, const struct hfs_bkey *key,
470 const void *data, hfs_u16 datasize)
472 struct hfs_brec brec;
473 struct hfs_belem *belem;
474 int err, reserve, retval;
477 if (!tree || (tree->magic != HFS_BTREE_MAGIC) || !key || !data) {
478 hfs_warn("hfs_binsert: invalid arguments.\n");
482 if (key->KeyLen > tree->bthKeyLen) {
483 hfs_warn("hfs_binsert: oversized key\n");
488 if (!tree->bthNRecs) {
489 /* create the root bnode */
490 add_root(tree, NULL, NULL);
491 if (!hfs_brec_init(&brec, tree, HFS_BFIND_INSERT)) {
492 hfs_warn("hfs_binsert: failed to create root.\n");
496 err = hfs_bfind(&brec, tree, key, HFS_BFIND_INSERT);
498 hfs_warn("hfs_binsert: hfs_brec_find failed.\n");
500 } else if (err == 0) {
501 hfs_brec_relse(&brec, NULL);
506 keysize = key->KeyLen;
507 datasize = ROUND(datasize);
510 if (bnode_freespace(belem->bnr.bn) <
511 (sizeof(hfs_u16) + ROUND(keysize+1) + datasize)) {
512 belem->flags |= HFS_BPATH_OVERFLOW;
514 if (belem->record == 0) {
515 belem->flags |= HFS_BPATH_FIRST;
519 hfs_brec_lock(&brec, brec.bottom);
522 reserve = brec.bottom - brec.top;
526 /* make certain we have enough nodes to proceed */
527 if ((tree->bthFree - tree->reserved) < reserve) {
528 hfs_brec_relse(&brec, NULL);
529 hfs_btree_lock(tree);
530 if ((tree->bthFree - tree->reserved) < reserve) {
531 hfs_btree_extend(tree);
533 hfs_btree_unlock(tree);
534 if ((tree->bthFree - tree->reserved) < reserve) {
540 tree->reserved += reserve;
541 hfs_brec_lock(&brec, NULL);
544 retval = binsert(&brec, key, data, keysize, datasize, reserve);
545 hfs_brec_relse(&brec, NULL);