2 * Copyright 2000-2002 by Hans Reiser, licensing governed by reiserfs/README
5 /* Now we have all buffers that must be used in balancing of the tree */
6 /* Further calculations can not cause schedule(), and thus the buffer */
7 /* tree will be stable until the balancing will be finished */
8 /* balance the tree according to the analysis made before, */
9 /* and using buffers obtained after all above. */
13 ** balance_leaf_when_delete
19 #include <linux/config.h>
20 #include <asm/uaccess.h>
21 #include <linux/sched.h>
22 #include <linux/reiserfs_fs.h>
24 #ifdef CONFIG_REISERFS_CHECK
26 struct tree_balance * cur_tb = NULL; /* detects whether more than one
27 copy of tb exists as a means
28 of checking whether schedule
29 is interrupting do_balance */
33 inline void do_balance_mark_leaf_dirty (struct tree_balance * tb,
34 struct buffer_head * bh, int flag)
36 if (reiserfs_dont_log(tb->tb_sb)) {
37 if (!test_and_set_bit(BH_Dirty, &bh->b_state)) {
38 __mark_buffer_dirty(bh) ;
39 tb->need_balance_dirty = 1;
42 int windex = push_journal_writer("do_balance") ;
43 journal_mark_dirty(tb->transaction_handle, tb->transaction_handle->t_super, bh) ;
44 pop_journal_writer(windex) ;
48 #define do_balance_mark_internal_dirty do_balance_mark_leaf_dirty
49 #define do_balance_mark_sb_dirty do_balance_mark_leaf_dirty
53 if deleting something ( tb->insert_size[0] < 0 )
54 return(balance_leaf_when_delete()); (flag d handled here)
56 if lnum is larger than 0 we put items into the left node
57 if rnum is larger than 0 we put items into the right node
58 if snum1 is larger than 0 we put items into the new node s1
59 if snum2 is larger than 0 we put items into the new node s2
60 Note that all *num* count new items being created.
62 It would be easier to read balance_leaf() if each of these summary
63 lines was a separate procedure rather than being inlined. I think
64 that there are many passages here and in balance_leaf_when_delete() in
65 which two calls to one procedure can replace two passages, and it
66 might save cache space and improve software maintenance costs to do so.
68 Vladimir made the perceptive comment that we should offload most of
69 the decision making in this function into fix_nodes/check_balance, and
70 then create some sort of structure in tb that says what actions should
71 be performed by do_balance.
77 /* Balance leaf node in case of delete or cut: insert_size[0] < 0
79 * lnum, rnum can have values >= -1
80 * -1 means that the neighbor must be joined with S
81 * 0 means that nothing should be done with the neighbor
82 * >0 means to shift entirely or partly the specified number of items to the neighbor
84 static int balance_leaf_when_delete (struct tree_balance * tb, int flag)
86 struct buffer_head * tbS0 = PATH_PLAST_BUFFER (tb->tb_path);
87 int item_pos = PATH_LAST_POSITION (tb->tb_path);
88 int pos_in_item = tb->tb_path->pos_in_item;
89 struct buffer_info bi;
91 struct item_head * ih;
93 RFALSE( tb->FR[0] && B_LEVEL (tb->FR[0]) != DISK_LEAF_NODE_LEVEL + 1,
94 "vs- 12000: level: wrong FR %z\n", tb->FR[0]);
95 RFALSE( tb->blknum[0] > 1,
96 "PAP-12005: tb->blknum == %d, can not be > 1", tb->blknum[0]);
97 RFALSE( ! tb->blknum[0] && ! PATH_H_PPARENT(tb->tb_path, 0),
98 "PAP-12010: tree can not be empty");
100 ih = B_N_PITEM_HEAD (tbS0, item_pos);
102 /* Delete or truncate the item */
105 case M_DELETE: /* delete item in S[0] */
107 RFALSE( ih_item_len(ih) + IH_SIZE != -tb->insert_size[0],
108 "vs-12013: mode Delete, insert size %d, ih to be deleted %h",
109 -tb->insert_size [0], ih);
113 bi.bi_parent = PATH_H_PPARENT (tb->tb_path, 0);
114 bi.bi_position = PATH_H_POSITION (tb->tb_path, 1);
115 leaf_delete_items (&bi, 0, item_pos, 1, -1);
117 if ( ! item_pos && tb->CFL[0] ) {
118 if ( B_NR_ITEMS(tbS0) ) {
119 replace_key(tb, tb->CFL[0],tb->lkey[0],tbS0,0);
122 if ( ! PATH_H_POSITION (tb->tb_path, 1) )
123 replace_key(tb, tb->CFL[0],tb->lkey[0],PATH_H_PPARENT(tb->tb_path, 0),0);
127 RFALSE( ! item_pos && !tb->CFL[0],
128 "PAP-12020: tb->CFL[0]==%p, tb->L[0]==%p", tb->CFL[0], tb->L[0]);
132 case M_CUT: { /* cut item in S[0] */
135 bi.bi_parent = PATH_H_PPARENT (tb->tb_path, 0);
136 bi.bi_position = PATH_H_POSITION (tb->tb_path, 1);
137 if (is_direntry_le_ih (ih)) {
139 /* UFS unlink semantics are such that you can only delete one directory entry at a time. */
140 /* when we cut a directory tb->insert_size[0] means number of entries to be cut (always 1) */
141 tb->insert_size[0] = -1;
142 leaf_cut_from_buffer (&bi, item_pos, pos_in_item, -tb->insert_size[0]);
144 RFALSE( ! item_pos && ! pos_in_item && ! tb->CFL[0],
145 "PAP-12030: can not change delimiting key. CFL[0]=%p",
148 if ( ! item_pos && ! pos_in_item && tb->CFL[0] ) {
149 replace_key(tb, tb->CFL[0],tb->lkey[0],tbS0,0);
152 leaf_cut_from_buffer (&bi, item_pos, pos_in_item, -tb->insert_size[0]);
154 RFALSE( ! ih_item_len(ih),
155 "PAP-12035: cut must leave non-zero dynamic length of item");
161 print_cur_tb ("12040");
162 reiserfs_panic (tb->tb_sb, "PAP-12040: balance_leaf_when_delete: unexpectable mode: %s(%d)",
163 (flag == M_PASTE) ? "PASTE" : ((flag == M_INSERT) ? "INSERT" : "UNKNOWN"), flag);
166 /* the rule is that no shifting occurs unless by shifting a node can be freed */
167 n = B_NR_ITEMS(tbS0);
168 if ( tb->lnum[0] ) /* L[0] takes part in balancing */
170 if ( tb->lnum[0] == -1 ) /* L[0] must be joined with S[0] */
172 if ( tb->rnum[0] == -1 ) /* R[0] must be also joined with S[0] */
174 if ( tb->FR[0] == PATH_H_PPARENT(tb->tb_path, 0) )
176 /* all contents of all the 3 buffers will be in L[0] */
177 if ( PATH_H_POSITION (tb->tb_path, 1) == 0 && 1 < B_NR_ITEMS(tb->FR[0]) )
178 replace_key(tb, tb->CFL[0],tb->lkey[0],tb->FR[0],1);
180 leaf_move_items (LEAF_FROM_S_TO_L, tb, n, -1, 0);
181 leaf_move_items (LEAF_FROM_R_TO_L, tb, B_NR_ITEMS(tb->R[0]), -1, 0);
183 reiserfs_invalidate_buffer (tb, tbS0);
184 reiserfs_invalidate_buffer (tb, tb->R[0]);
188 /* all contents of all the 3 buffers will be in R[0] */
189 leaf_move_items (LEAF_FROM_S_TO_R, tb, n, -1, 0);
190 leaf_move_items (LEAF_FROM_L_TO_R, tb, B_NR_ITEMS(tb->L[0]), -1, 0);
192 /* right_delimiting_key is correct in R[0] */
193 replace_key(tb, tb->CFR[0],tb->rkey[0],tb->R[0],0);
195 reiserfs_invalidate_buffer (tb, tbS0);
196 reiserfs_invalidate_buffer (tb, tb->L[0]);
201 RFALSE( tb->rnum[0] != 0,
202 "PAP-12045: rnum must be 0 (%d)", tb->rnum[0]);
203 /* all contents of L[0] and S[0] will be in L[0] */
204 leaf_shift_left(tb, n, -1);
206 reiserfs_invalidate_buffer (tb, tbS0);
210 /* a part of contents of S[0] will be in L[0] and the rest part of S[0] will be in R[0] */
212 RFALSE( ( tb->lnum[0] + tb->rnum[0] < n ) ||
213 ( tb->lnum[0] + tb->rnum[0] > n+1 ),
214 "PAP-12050: rnum(%d) and lnum(%d) and item number(%d) in S[0] are not consistent",
215 tb->rnum[0], tb->lnum[0], n);
216 RFALSE( ( tb->lnum[0] + tb->rnum[0] == n ) &&
217 (tb->lbytes != -1 || tb->rbytes != -1),
218 "PAP-12055: bad rbytes (%d)/lbytes (%d) parameters when items are not split",
219 tb->rbytes, tb->lbytes);
220 RFALSE( ( tb->lnum[0] + tb->rnum[0] == n + 1 ) &&
221 (tb->lbytes < 1 || tb->rbytes != -1),
222 "PAP-12060: bad rbytes (%d)/lbytes (%d) parameters when items are split",
223 tb->rbytes, tb->lbytes);
225 leaf_shift_left (tb, tb->lnum[0], tb->lbytes);
226 leaf_shift_right(tb, tb->rnum[0], tb->rbytes);
228 reiserfs_invalidate_buffer (tb, tbS0);
233 if ( tb->rnum[0] == -1 ) {
234 /* all contents of R[0] and S[0] will be in R[0] */
235 leaf_shift_right(tb, n, -1);
236 reiserfs_invalidate_buffer (tb, tbS0);
241 "PAP-12065: bad rnum parameter must be 0 (%d)", tb->rnum[0]);
246 static int balance_leaf (struct tree_balance * tb,
247 struct item_head * ih, /* item header of inserted item (this is on little endian) */
248 const char * body, /* body of inserted item or bytes to paste */
249 int flag, /* i - insert, d - delete, c - cut, p - paste
250 (see comment to do_balance) */
251 struct item_head * insert_key, /* in our processing of one level we sometimes determine what
252 must be inserted into the next higher level. This insertion
253 consists of a key or two keys and their corresponding
255 struct buffer_head ** insert_ptr /* inserted node-ptrs for the next level */
258 struct buffer_head * tbS0 = PATH_PLAST_BUFFER (tb->tb_path);
259 int item_pos = PATH_LAST_POSITION (tb->tb_path); /* index into the array of item headers in S[0]
260 of the affected item */
261 struct buffer_info bi;
262 struct buffer_head *S_new[2]; /* new nodes allocated to hold what could not fit into S */
263 int snum[2]; /* number of items that will be placed
264 into S_new (includes partially shifted
266 int sbytes[2]; /* if an item is partially shifted into S_new then
267 if it is a directory item
268 it is the number of entries from the item that are shifted into S_new
270 it is the number of bytes from the item that are shifted into S_new
277 PROC_INFO_INC( tb -> tb_sb, balance_at[ 0 ] );
279 /* Make balance in case insert_size[0] < 0 */
280 if ( tb->insert_size[0] < 0 )
281 return balance_leaf_when_delete (tb, flag);
284 if (flag == M_INSERT && body == 0)
285 zeros_num = ih_item_len( ih );
287 pos_in_item = tb->tb_path->pos_in_item;
288 /* for indirect item pos_in_item is measured in unformatted node
289 pointers. Recalculate to bytes */
290 if (flag != M_INSERT && is_indirect_le_ih (B_N_PITEM_HEAD (tbS0, item_pos)))
291 pos_in_item *= UNFM_P_SIZE;
293 if ( tb->lnum[0] > 0 ) {
294 /* Shift lnum[0] items from S[0] to the left neighbor L[0] */
295 if ( item_pos < tb->lnum[0] ) {
296 /* new item or it part falls to L[0], shift it too */
297 n = B_NR_ITEMS(tb->L[0]);
300 case M_INSERT: /* insert item into L[0] */
302 if ( item_pos == tb->lnum[0] - 1 && tb->lbytes != -1 ) {
303 /* part of new item falls into L[0] */
307 ret_val = leaf_shift_left (tb, tb->lnum[0]-1, -1);
309 /* Calculate item length to insert to S[0] */
310 new_item_len = ih_item_len(ih) - tb->lbytes;
311 /* Calculate and check item length to insert to L[0] */
312 put_ih_item_len(ih, ih_item_len(ih) - new_item_len );
314 RFALSE( ih_item_len(ih) <= 0,
315 "PAP-12080: there is nothing to insert into L[0]: ih_item_len=%d",
318 /* Insert new item into L[0] */
321 bi.bi_parent = tb->FL[0];
322 bi.bi_position = get_left_neighbor_position (tb, 0);
323 leaf_insert_into_buf (&bi, n + item_pos - ret_val, ih, body,
324 zeros_num > ih_item_len(ih) ? ih_item_len(ih) : zeros_num);
326 version = ih_version (ih);
328 /* Calculate key component, item length and body to insert into S[0] */
329 set_le_ih_k_offset( ih, le_ih_k_offset( ih ) + (tb->lbytes << (is_indirect_le_ih(ih)?tb->tb_sb->s_blocksize_bits-UNFM_P_SHIFT:0)) );
331 put_ih_item_len( ih, new_item_len );
332 if ( tb->lbytes > zeros_num ) {
333 body += (tb->lbytes - zeros_num);
337 zeros_num -= tb->lbytes;
339 RFALSE( ih_item_len(ih) <= 0,
340 "PAP-12085: there is nothing to insert into S[0]: ih_item_len=%d",
343 /* new item in whole falls into L[0] */
344 /* Shift lnum[0]-1 items to L[0] */
345 ret_val = leaf_shift_left(tb, tb->lnum[0]-1, tb->lbytes);
346 /* Insert new item into L[0] */
349 bi.bi_parent = tb->FL[0];
350 bi.bi_position = get_left_neighbor_position (tb, 0);
351 leaf_insert_into_buf (&bi, n + item_pos - ret_val, ih, body, zeros_num);
352 tb->insert_size[0] = 0;
357 case M_PASTE: /* append item in L[0] */
359 if ( item_pos == tb->lnum[0] - 1 && tb->lbytes != -1 ) {
360 /* we must shift the part of the appended item */
361 if ( is_direntry_le_ih (B_N_PITEM_HEAD (tbS0, item_pos))) {
364 "PAP-12090: illegal parameter in case of a directory");
366 if ( tb->lbytes > pos_in_item ) {
367 /* new directory entry falls into L[0] */
368 struct item_head * pasted;
369 int l_pos_in_item = pos_in_item;
371 /* Shift lnum[0] - 1 items in whole. Shift lbytes - 1 entries from given directory item */
372 ret_val = leaf_shift_left(tb, tb->lnum[0], tb->lbytes - 1);
373 if ( ret_val && ! item_pos ) {
374 pasted = B_N_PITEM_HEAD(tb->L[0],B_NR_ITEMS(tb->L[0])-1);
375 l_pos_in_item += I_ENTRY_COUNT(pasted) - (tb->lbytes-1);
378 /* Append given directory entry to directory item */
381 bi.bi_parent = tb->FL[0];
382 bi.bi_position = get_left_neighbor_position (tb, 0);
383 leaf_paste_in_buffer (&bi, n + item_pos - ret_val, l_pos_in_item,
384 tb->insert_size[0], body, zeros_num);
386 /* previous string prepared space for pasting new entry, following string pastes this entry */
388 /* when we have merge directory item, pos_in_item has been changed too */
390 /* paste new directory entry. 1 is entry number */
391 leaf_paste_entries (bi.bi_bh, n + item_pos - ret_val, l_pos_in_item, 1,
392 (struct reiserfs_de_head *)body,
393 body + DEH_SIZE, tb->insert_size[0]
395 tb->insert_size[0] = 0;
397 /* new directory item doesn't fall into L[0] */
398 /* Shift lnum[0]-1 items in whole. Shift lbytes directory entries from directory item number lnum[0] */
399 leaf_shift_left (tb, tb->lnum[0], tb->lbytes);
401 /* Calculate new position to append in item body */
402 pos_in_item -= tb->lbytes;
406 RFALSE( tb->lbytes <= 0,
407 "PAP-12095: there is nothing to shift to L[0]. lbytes=%d",
409 RFALSE( pos_in_item != ih_item_len(B_N_PITEM_HEAD(tbS0, item_pos)),
410 "PAP-12100: incorrect position to paste: item_len=%d, pos_in_item=%d",
411 ih_item_len(B_N_PITEM_HEAD(tbS0,item_pos)), pos_in_item);
413 if ( tb->lbytes >= pos_in_item ) {
414 /* appended item will be in L[0] in whole */
417 /* this bytes number must be appended to the last item of L[h] */
418 l_n = tb->lbytes - pos_in_item;
420 /* Calculate new insert_size[0] */
421 tb->insert_size[0] -= l_n;
423 RFALSE( tb->insert_size[0] <= 0,
424 "PAP-12105: there is nothing to paste into L[0]. insert_size=%d",
426 ret_val = leaf_shift_left(tb,tb->lnum[0],
427 ih_item_len(B_N_PITEM_HEAD(tbS0,item_pos)));
428 /* Append to body of item in L[0] */
431 bi.bi_parent = tb->FL[0];
432 bi.bi_position = get_left_neighbor_position (tb, 0);
433 leaf_paste_in_buffer(
434 &bi,n + item_pos - ret_val,
435 ih_item_len( B_N_PITEM_HEAD(tb->L[0],n+item_pos-ret_val)),
436 l_n,body, zeros_num > l_n ? l_n : zeros_num
438 /* 0-th item in S0 can be only of DIRECT type when l_n != 0*/
443 RFALSE (ih_item_len (B_N_PITEM_HEAD (tbS0, 0)),
444 "PAP-12106: item length must be 0");
445 RFALSE (comp_short_le_keys (B_N_PKEY (tbS0, 0),
447 n + item_pos - ret_val)),
448 "PAP-12107: items must be of the same file");
449 if (is_indirect_le_ih(B_N_PITEM_HEAD (tb->L[0],
450 n + item_pos - ret_val))) {
451 temp_l = l_n << (tb->tb_sb->s_blocksize_bits - UNFM_P_SHIFT);
453 /* update key of first item in S0 */
454 version = ih_version (B_N_PITEM_HEAD (tbS0, 0));
455 set_le_key_k_offset (version, B_N_PKEY (tbS0, 0),
456 le_key_k_offset (version, B_N_PKEY (tbS0, 0)) + temp_l);
457 /* update left delimiting key */
458 set_le_key_k_offset (version, B_N_PDELIM_KEY(tb->CFL[0],tb->lkey[0]),
459 le_key_k_offset (version, B_N_PDELIM_KEY(tb->CFL[0],tb->lkey[0])) + temp_l);
462 /* Calculate new body, position in item and insert_size[0] */
463 if ( l_n > zeros_num ) {
464 body += (l_n - zeros_num);
471 RFALSE( comp_short_le_keys
473 B_N_PKEY(tb->L[0],B_NR_ITEMS(tb->L[0])-1)) ||
475 !op_is_left_mergeable
476 (B_N_PKEY (tbS0, 0), tbS0->b_size) ||
477 !op_is_left_mergeable
478 (B_N_PDELIM_KEY(tb->CFL[0],tb->lkey[0]),
480 "PAP-12120: item must be merge-able with left neighboring item");
482 else /* only part of the appended item will be in L[0] */
484 /* Calculate position in item for append in S[0] */
485 pos_in_item -= tb->lbytes;
487 RFALSE( pos_in_item <= 0,
488 "PAP-12125: no place for paste. pos_in_item=%d", pos_in_item);
490 /* Shift lnum[0] - 1 items in whole. Shift lbytes - 1 byte from item number lnum[0] */
491 leaf_shift_left(tb,tb->lnum[0],tb->lbytes);
495 else /* appended item will be in L[0] in whole */
497 struct item_head * pasted;
499 if ( ! item_pos && op_is_left_mergeable (B_N_PKEY (tbS0, 0), tbS0->b_size) )
500 { /* if we paste into first item of S[0] and it is left mergable */
501 /* then increment pos_in_item by the size of the last item in L[0] */
502 pasted = B_N_PITEM_HEAD(tb->L[0],n-1);
503 if ( is_direntry_le_ih (pasted) )
504 pos_in_item += ih_entry_count(pasted);
506 pos_in_item += ih_item_len(pasted);
509 /* Shift lnum[0] - 1 items in whole. Shift lbytes - 1 byte from item number lnum[0] */
510 ret_val = leaf_shift_left(tb,tb->lnum[0],tb->lbytes);
511 /* Append to body of item in L[0] */
514 bi.bi_parent = tb->FL[0];
515 bi.bi_position = get_left_neighbor_position (tb, 0);
516 leaf_paste_in_buffer (&bi, n + item_pos - ret_val, pos_in_item, tb->insert_size[0],
519 /* if appended item is directory, paste entry */
520 pasted = B_N_PITEM_HEAD (tb->L[0], n + item_pos - ret_val);
521 if (is_direntry_le_ih (pasted))
523 bi.bi_bh, n + item_pos - ret_val, pos_in_item, 1,
524 (struct reiserfs_de_head *)body, body + DEH_SIZE, tb->insert_size[0]
526 /* if appended item is indirect item, put unformatted node into un list */
527 if (is_indirect_le_ih (pasted))
528 set_ih_free_space (pasted, 0);
529 tb->insert_size[0] = 0;
533 default: /* cases d and t */
534 reiserfs_panic (tb->tb_sb, "PAP-12130: balance_leaf: lnum > 0: unexpectable mode: %s(%d)",
535 (flag == M_DELETE) ? "DELETE" : ((flag == M_CUT) ? "CUT" : "UNKNOWN"), flag);
538 /* new item doesn't fall into L[0] */
539 leaf_shift_left(tb,tb->lnum[0],tb->lbytes);
541 } /* tb->lnum[0] > 0 */
543 /* Calculate new item position */
544 item_pos -= ( tb->lnum[0] - (( tb->lbytes != -1 ) ? 1 : 0));
546 if ( tb->rnum[0] > 0 ) {
547 /* shift rnum[0] items from S[0] to the right neighbor R[0] */
548 n = B_NR_ITEMS(tbS0);
551 case M_INSERT: /* insert item */
552 if ( n - tb->rnum[0] < item_pos )
553 { /* new item or its part falls to R[0] */
554 if ( item_pos == n - tb->rnum[0] + 1 && tb->rbytes != -1 )
555 { /* part of new item falls into R[0] */
556 loff_t old_key_comp, old_len, r_zeros_number;
561 leaf_shift_right(tb,tb->rnum[0]-1,-1);
563 version = ih_version(ih);
564 /* Remember key component and item length */
565 old_key_comp = le_ih_k_offset( ih );
566 old_len = ih_item_len(ih);
568 /* Calculate key component and item length to insert into R[0] */
569 offset = le_ih_k_offset( ih ) + ((old_len - tb->rbytes )<<(is_indirect_le_ih(ih)?tb->tb_sb->s_blocksize_bits-UNFM_P_SHIFT:0));
570 set_le_ih_k_offset( ih, offset );
571 put_ih_item_len( ih, tb->rbytes);
572 /* Insert part of the item into R[0] */
575 bi.bi_parent = tb->FR[0];
576 bi.bi_position = get_right_neighbor_position (tb, 0);
577 if ( (old_len - tb->rbytes) > zeros_num ) {
579 r_body = body + (old_len - tb->rbytes) - zeros_num;
583 r_zeros_number = zeros_num - (old_len - tb->rbytes);
584 zeros_num -= r_zeros_number;
587 leaf_insert_into_buf (&bi, 0, ih, r_body, r_zeros_number);
589 /* Replace right delimiting key by first key in R[0] */
590 replace_key(tb, tb->CFR[0],tb->rkey[0],tb->R[0],0);
592 /* Calculate key component and item length to insert into S[0] */
593 set_le_ih_k_offset( ih, old_key_comp );
594 put_ih_item_len( ih, old_len - tb->rbytes );
596 tb->insert_size[0] -= tb->rbytes;
599 else /* whole new item falls into R[0] */
601 /* Shift rnum[0]-1 items to R[0] */
602 ret_val = leaf_shift_right(tb,tb->rnum[0]-1,tb->rbytes);
603 /* Insert new item into R[0] */
606 bi.bi_parent = tb->FR[0];
607 bi.bi_position = get_right_neighbor_position (tb, 0);
608 leaf_insert_into_buf (&bi, item_pos - n + tb->rnum[0] - 1, ih, body, zeros_num);
610 if ( item_pos - n + tb->rnum[0] - 1 == 0 ) {
611 replace_key(tb, tb->CFR[0],tb->rkey[0],tb->R[0],0);
614 zeros_num = tb->insert_size[0] = 0;
617 else /* new item or part of it doesn't fall into R[0] */
619 leaf_shift_right(tb,tb->rnum[0],tb->rbytes);
623 case M_PASTE: /* append item */
625 if ( n - tb->rnum[0] <= item_pos ) /* pasted item or part of it falls to R[0] */
627 if ( item_pos == n - tb->rnum[0] && tb->rbytes != -1 )
628 { /* we must shift the part of the appended item */
629 if ( is_direntry_le_ih (B_N_PITEM_HEAD(tbS0, item_pos)))
630 { /* we append to directory item */
634 "PAP-12145: illegal parametr in case of a directory");
635 entry_count = I_ENTRY_COUNT(B_N_PITEM_HEAD(tbS0, item_pos));
636 if ( entry_count - tb->rbytes < pos_in_item )
637 /* new directory entry falls into R[0] */
639 int paste_entry_position;
641 RFALSE( tb->rbytes - 1 >= entry_count ||
642 ! tb->insert_size[0],
643 "PAP-12150: no enough of entries to shift to R[0]: rbytes=%d, entry_count=%d",
644 tb->rbytes, entry_count);
645 /* Shift rnum[0]-1 items in whole. Shift rbytes-1 directory entries from directory item number rnum[0] */
646 leaf_shift_right(tb,tb->rnum[0],tb->rbytes - 1);
647 /* Paste given directory entry to directory item */
648 paste_entry_position = pos_in_item - entry_count + tb->rbytes - 1;
651 bi.bi_parent = tb->FR[0];
652 bi.bi_position = get_right_neighbor_position (tb, 0);
653 leaf_paste_in_buffer (&bi, 0, paste_entry_position,
654 tb->insert_size[0],body,zeros_num);
657 bi.bi_bh, 0, paste_entry_position, 1, (struct reiserfs_de_head *)body,
658 body + DEH_SIZE, tb->insert_size[0]
661 if ( paste_entry_position == 0 ) {
662 /* change delimiting keys */
663 replace_key(tb, tb->CFR[0],tb->rkey[0],tb->R[0],0);
666 tb->insert_size[0] = 0;
669 else /* new directory entry doesn't fall into R[0] */
671 leaf_shift_right(tb,tb->rnum[0],tb->rbytes);
674 else /* regular object */
676 int n_shift, n_rem, r_zeros_number;
679 /* Calculate number of bytes which must be shifted from appended item */
680 if ( (n_shift = tb->rbytes - tb->insert_size[0]) < 0 )
683 RFALSE(pos_in_item != ih_item_len(B_N_PITEM_HEAD (tbS0, item_pos)),
684 "PAP-12155: invalid position to paste. ih_item_len=%d, pos_in_item=%d",
685 pos_in_item, ih_item_len( B_N_PITEM_HEAD(tbS0,item_pos)));
687 leaf_shift_right(tb,tb->rnum[0],n_shift);
688 /* Calculate number of bytes which must remain in body after appending to R[0] */
689 if ( (n_rem = tb->insert_size[0] - tb->rbytes) < 0 )
694 unsigned long temp_rem = n_rem;
696 version = ih_version (B_N_PITEM_HEAD (tb->R[0],0));
697 if (is_indirect_le_key(version,B_N_PKEY(tb->R[0],0))){
698 temp_rem = n_rem << (tb->tb_sb->s_blocksize_bits -
701 set_le_key_k_offset (version, B_N_PKEY(tb->R[0],0),
702 le_key_k_offset (version, B_N_PKEY(tb->R[0],0)) + temp_rem);
703 set_le_key_k_offset (version, B_N_PDELIM_KEY(tb->CFR[0],tb->rkey[0]),
704 le_key_k_offset (version, B_N_PDELIM_KEY(tb->CFR[0],tb->rkey[0])) + temp_rem);
706 /* k_offset (B_N_PKEY(tb->R[0],0)) += n_rem;
707 k_offset (B_N_PDELIM_KEY(tb->CFR[0],tb->rkey[0])) += n_rem;*/
708 do_balance_mark_internal_dirty (tb, tb->CFR[0], 0);
710 /* Append part of body into R[0] */
713 bi.bi_parent = tb->FR[0];
714 bi.bi_position = get_right_neighbor_position (tb, 0);
715 if ( n_rem > zeros_num ) {
717 r_body = body + n_rem - zeros_num;
721 r_zeros_number = zeros_num - n_rem;
722 zeros_num -= r_zeros_number;
725 leaf_paste_in_buffer(&bi, 0, n_shift, tb->insert_size[0] - n_rem, r_body, r_zeros_number);
727 if (is_indirect_le_ih (B_N_PITEM_HEAD(tb->R[0],0))) {
730 "PAP-12160: paste more than one unformatted node pointer");
732 set_ih_free_space (B_N_PITEM_HEAD(tb->R[0],0), 0);
734 tb->insert_size[0] = n_rem;
739 else /* pasted item in whole falls into R[0] */
741 struct item_head * pasted;
743 ret_val = leaf_shift_right(tb,tb->rnum[0],tb->rbytes);
744 /* append item in R[0] */
745 if ( pos_in_item >= 0 ) {
748 bi.bi_parent = tb->FR[0];
749 bi.bi_position = get_right_neighbor_position (tb, 0);
750 leaf_paste_in_buffer(&bi,item_pos - n + tb->rnum[0], pos_in_item,
751 tb->insert_size[0],body, zeros_num);
754 /* paste new entry, if item is directory item */
755 pasted = B_N_PITEM_HEAD(tb->R[0], item_pos - n + tb->rnum[0]);
756 if (is_direntry_le_ih (pasted) && pos_in_item >= 0 ) {
758 bi.bi_bh, item_pos - n + tb->rnum[0], pos_in_item, 1,
759 (struct reiserfs_de_head *)body, body + DEH_SIZE, tb->insert_size[0]
761 if ( ! pos_in_item ) {
763 RFALSE( item_pos - n + tb->rnum[0],
764 "PAP-12165: directory item must be first item of node when pasting is in 0th position");
766 /* update delimiting keys */
767 replace_key(tb, tb->CFR[0],tb->rkey[0],tb->R[0],0);
771 if (is_indirect_le_ih (pasted))
772 set_ih_free_space (pasted, 0);
773 zeros_num = tb->insert_size[0] = 0;
776 else /* new item doesn't fall into R[0] */
778 leaf_shift_right(tb,tb->rnum[0],tb->rbytes);
781 default: /* cases d and t */
782 reiserfs_panic (tb->tb_sb, "PAP-12175: balance_leaf: rnum > 0: unexpectable mode: %s(%d)",
783 (flag == M_DELETE) ? "DELETE" : ((flag == M_CUT) ? "CUT" : "UNKNOWN"), flag);
786 } /* tb->rnum[0] > 0 */
789 RFALSE( tb->blknum[0] > 3,
790 "PAP-12180: blknum can not be %d. It must be <= 3", tb->blknum[0]);
791 RFALSE( tb->blknum[0] < 0,
792 "PAP-12185: blknum can not be %d. It must be >= 0", tb->blknum[0]);
794 /* if while adding to a node we discover that it is possible to split
795 it in two, and merge the left part into the left neighbor and the
796 right part into the right neighbor, eliminating the node */
797 if ( tb->blknum[0] == 0 ) { /* node S[0] is empty now */
799 RFALSE( ! tb->lnum[0] || ! tb->rnum[0],
800 "PAP-12190: lnum and rnum must not be zero");
801 /* if insertion was done before 0-th position in R[0], right
802 delimiting key of the tb->L[0]'s and left delimiting key are
806 reiserfs_panic (tb->tb_sb, "vs-12195: balance_leaf: CFR not initialized");
807 copy_key (B_N_PDELIM_KEY (tb->CFL[0], tb->lkey[0]), B_N_PDELIM_KEY (tb->CFR[0], tb->rkey[0]));
808 do_balance_mark_internal_dirty (tb, tb->CFL[0], 0);
811 reiserfs_invalidate_buffer(tb,tbS0);
816 /* Fill new nodes that appear in place of S[0] */
818 /* I am told that this copying is because we need an array to enable
819 the looping code. -Hans */
822 sbytes[0] = tb->s1bytes;
823 sbytes[1] = tb->s2bytes;
824 for( i = tb->blknum[0] - 2; i >= 0; i-- ) {
826 RFALSE( !snum[i], "PAP-12200: snum[%d] == %d. Must be > 0", i, snum[i]);
828 /* here we shift from S to S_new nodes */
830 S_new[i] = get_FEB(tb);
832 /* initialized block type and tree level */
833 set_blkh_level( B_BLK_HEAD(S_new[i]), DISK_LEAF_NODE_LEVEL );
836 n = B_NR_ITEMS(tbS0);
839 case M_INSERT: /* insert item */
841 if ( n - snum[i] < item_pos )
842 { /* new item or it's part falls to first new node S_new[i]*/
843 if ( item_pos == n - snum[i] + 1 && sbytes[i] != -1 )
844 { /* part of new item falls into S_new[i] */
845 int old_key_comp, old_len, r_zeros_number;
849 /* Move snum[i]-1 items from S[0] to S_new[i] */
850 leaf_move_items (LEAF_FROM_S_TO_SNEW, tb, snum[i] - 1, -1, S_new[i]);
851 /* Remember key component and item length */
852 version = ih_version (ih);
853 old_key_comp = le_ih_k_offset( ih );
854 old_len = ih_item_len(ih);
856 /* Calculate key component and item length to insert into S_new[i] */
857 set_le_ih_k_offset( ih,
858 le_ih_k_offset(ih) + ((old_len - sbytes[i] )<<(is_indirect_le_ih(ih)?tb->tb_sb->s_blocksize_bits-UNFM_P_SHIFT:0) ));
860 put_ih_item_len( ih, sbytes[i] );
862 /* Insert part of the item into S_new[i] before 0-th item */
868 if ( (old_len - sbytes[i]) > zeros_num ) {
870 r_body = body + (old_len - sbytes[i]) - zeros_num;
874 r_zeros_number = zeros_num - (old_len - sbytes[i]);
875 zeros_num -= r_zeros_number;
878 leaf_insert_into_buf (&bi, 0, ih, r_body, r_zeros_number);
880 /* Calculate key component and item length to insert into S[i] */
881 set_le_ih_k_offset( ih, old_key_comp );
882 put_ih_item_len( ih, old_len - sbytes[i] );
883 tb->insert_size[0] -= sbytes[i];
885 else /* whole new item falls into S_new[i] */
887 /* Shift snum[0] - 1 items to S_new[i] (sbytes[i] of split item) */
888 leaf_move_items (LEAF_FROM_S_TO_SNEW, tb, snum[i] - 1, sbytes[i], S_new[i]);
890 /* Insert new item into S_new[i] */
895 leaf_insert_into_buf (&bi, item_pos - n + snum[i] - 1, ih, body, zeros_num);
897 zeros_num = tb->insert_size[0] = 0;
901 else /* new item or it part don't falls into S_new[i] */
903 leaf_move_items (LEAF_FROM_S_TO_SNEW, tb, snum[i], sbytes[i], S_new[i]);
907 case M_PASTE: /* append item */
909 if ( n - snum[i] <= item_pos ) /* pasted item or part if it falls to S_new[i] */
911 if ( item_pos == n - snum[i] && sbytes[i] != -1 )
912 { /* we must shift part of the appended item */
913 struct item_head * aux_ih;
915 RFALSE( ih, "PAP-12210: ih must be 0");
917 if ( is_direntry_le_ih (aux_ih = B_N_PITEM_HEAD(tbS0,item_pos))) {
918 /* we append to directory item */
922 entry_count = ih_entry_count(aux_ih);
924 if ( entry_count - sbytes[i] < pos_in_item && pos_in_item <= entry_count ) {
925 /* new directory entry falls into S_new[i] */
927 RFALSE( ! tb->insert_size[0],
928 "PAP-12215: insert_size is already 0");
929 RFALSE( sbytes[i] - 1 >= entry_count,
930 "PAP-12220: there are no so much entries (%d), only %d",
931 sbytes[i] - 1, entry_count);
933 /* Shift snum[i]-1 items in whole. Shift sbytes[i] directory entries from directory item number snum[i] */
934 leaf_move_items (LEAF_FROM_S_TO_SNEW, tb, snum[i], sbytes[i]-1, S_new[i]);
935 /* Paste given directory entry to directory item */
940 leaf_paste_in_buffer (&bi, 0, pos_in_item - entry_count + sbytes[i] - 1,
941 tb->insert_size[0], body,zeros_num);
942 /* paste new directory entry */
944 bi.bi_bh, 0, pos_in_item - entry_count + sbytes[i] - 1,
945 1, (struct reiserfs_de_head *)body, body + DEH_SIZE,
948 tb->insert_size[0] = 0;
950 } else { /* new directory entry doesn't fall into S_new[i] */
951 leaf_move_items (LEAF_FROM_S_TO_SNEW, tb, snum[i], sbytes[i], S_new[i]);
954 else /* regular object */
956 int n_shift, n_rem, r_zeros_number;
959 RFALSE( pos_in_item != ih_item_len(B_N_PITEM_HEAD(tbS0,item_pos)) ||
960 tb->insert_size[0] <= 0,
961 "PAP-12225: item too short or insert_size <= 0");
963 /* Calculate number of bytes which must be shifted from appended item */
964 n_shift = sbytes[i] - tb->insert_size[0];
967 leaf_move_items (LEAF_FROM_S_TO_SNEW, tb, snum[i], n_shift, S_new[i]);
969 /* Calculate number of bytes which must remain in body after append to S_new[i] */
970 n_rem = tb->insert_size[0] - sbytes[i];
973 /* Append part of body into S_new[0] */
979 if ( n_rem > zeros_num ) {
981 r_body = body + n_rem - zeros_num;
985 r_zeros_number = zeros_num - n_rem;
986 zeros_num -= r_zeros_number;
989 leaf_paste_in_buffer(&bi, 0, n_shift, tb->insert_size[0]-n_rem, r_body,r_zeros_number);
991 struct item_head * tmp;
993 tmp = B_N_PITEM_HEAD(S_new[i],0);
994 if (is_indirect_le_ih (tmp)) {
995 set_ih_free_space (tmp, 0);
996 set_le_ih_k_offset( tmp, le_ih_k_offset(tmp) +
997 (n_rem << (tb->tb_sb->s_blocksize_bits -
1000 set_le_ih_k_offset( tmp, le_ih_k_offset(tmp) +
1005 tb->insert_size[0] = n_rem;
1011 /* item falls wholly into S_new[i] */
1014 struct item_head * pasted;
1016 #ifdef CONFIG_REISERFS_CHECK
1017 struct item_head * ih = B_N_PITEM_HEAD(tbS0,item_pos);
1019 if ( ! is_direntry_le_ih(ih) && (pos_in_item != ih_item_len(ih) ||
1020 tb->insert_size[0] <= 0) )
1021 reiserfs_panic (tb->tb_sb, "PAP-12235: balance_leaf: pos_in_item must be equal to ih_item_len");
1022 #endif /* CONFIG_REISERFS_CHECK */
1024 ret_val = leaf_move_items (LEAF_FROM_S_TO_SNEW, tb, snum[i], sbytes[i], S_new[i]);
1027 "PAP-12240: unexpected value returned by leaf_move_items (%d)",
1030 /* paste into item */
1032 bi.bi_bh = S_new[i];
1035 leaf_paste_in_buffer(&bi, item_pos - n + snum[i], pos_in_item, tb->insert_size[0], body, zeros_num);
1037 pasted = B_N_PITEM_HEAD(S_new[i], item_pos - n + snum[i]);
1038 if (is_direntry_le_ih (pasted))
1040 leaf_paste_entries (
1041 bi.bi_bh, item_pos - n + snum[i], pos_in_item, 1,
1042 (struct reiserfs_de_head *)body, body + DEH_SIZE, tb->insert_size[0]
1046 /* if we paste to indirect item update ih_free_space */
1047 if (is_indirect_le_ih (pasted))
1048 set_ih_free_space (pasted, 0);
1049 zeros_num = tb->insert_size[0] = 0;
1053 else /* pasted item doesn't fall into S_new[i] */
1055 leaf_move_items (LEAF_FROM_S_TO_SNEW, tb, snum[i], sbytes[i], S_new[i]);
1058 default: /* cases d and t */
1059 reiserfs_panic (tb->tb_sb, "PAP-12245: balance_leaf: blknum > 2: unexpectable mode: %s(%d)",
1060 (flag == M_DELETE) ? "DELETE" : ((flag == M_CUT) ? "CUT" : "UNKNOWN"), flag);
1063 memcpy (insert_key + i,B_N_PKEY(S_new[i],0),KEY_SIZE);
1064 insert_ptr[i] = S_new[i];
1066 RFALSE( (atomic_read (&(S_new[i]->b_count)) != 1) &&
1067 (atomic_read(&(S_new[i]->b_count)) != 2 ||
1068 !(buffer_journaled(S_new[i]) ||
1069 buffer_journal_dirty(S_new[i]))),
1070 "PAP-12247: S_new[%d] : (%b)\n", i, S_new[i]);
1075 /* if the affected item was not wholly shifted then we perform all necessary operations on that part or whole of the
1076 affected item which remains in S */
1077 if ( 0 <= item_pos && item_pos < tb->s0num )
1078 { /* if we must insert or append into buffer S[0] */
1082 case M_INSERT: /* insert item into S[0] */
1085 bi.bi_parent = PATH_H_PPARENT (tb->tb_path, 0);
1086 bi.bi_position = PATH_H_POSITION (tb->tb_path, 1);
1087 leaf_insert_into_buf (&bi, item_pos, ih, body, zeros_num);
1089 /* If we insert the first key change the delimiting key */
1090 if( item_pos == 0 ) {
1091 if (tb->CFL[0]) /* can be 0 in reiserfsck */
1092 replace_key(tb, tb->CFL[0], tb->lkey[0],tbS0,0);
1097 case M_PASTE: { /* append item in S[0] */
1098 struct item_head * pasted;
1100 pasted = B_N_PITEM_HEAD (tbS0, item_pos);
1101 /* when directory, may be new entry already pasted */
1102 if (is_direntry_le_ih (pasted)) {
1103 if ( pos_in_item >= 0 &&
1104 pos_in_item <= ih_entry_count(pasted) ) {
1106 RFALSE( ! tb->insert_size[0],
1107 "PAP-12260: insert_size is 0 already");
1112 bi.bi_parent = PATH_H_PPARENT (tb->tb_path, 0);
1113 bi.bi_position = PATH_H_POSITION (tb->tb_path, 1);
1114 leaf_paste_in_buffer(&bi, item_pos, pos_in_item, tb->insert_size[0], body, zeros_num);
1117 leaf_paste_entries (
1118 bi.bi_bh, item_pos, pos_in_item, 1, (struct reiserfs_de_head *)body,
1119 body + DEH_SIZE, tb->insert_size[0]
1121 if ( ! item_pos && ! pos_in_item ) {
1122 RFALSE( !tb->CFL[0] || !tb->L[0],
1123 "PAP-12270: CFL[0]/L[0] must be specified");
1125 replace_key(tb, tb->CFL[0], tb->lkey[0],tbS0,0);
1129 tb->insert_size[0] = 0;
1131 } else { /* regular object */
1132 if ( pos_in_item == ih_item_len(pasted) ) {
1134 RFALSE( tb->insert_size[0] <= 0,
1135 "PAP-12275: insert size must not be %d",
1136 tb->insert_size[0]);
1139 bi.bi_parent = PATH_H_PPARENT (tb->tb_path, 0);
1140 bi.bi_position = PATH_H_POSITION (tb->tb_path, 1);
1141 leaf_paste_in_buffer (&bi, item_pos, pos_in_item, tb->insert_size[0], body, zeros_num);
1143 if (is_indirect_le_ih (pasted)) {
1145 RFALSE( tb->insert_size[0] != UNFM_P_SIZE,
1146 "PAP-12280: insert_size for indirect item must be %d, not %d",
1147 UNFM_P_SIZE, tb->insert_size[0]);
1149 set_ih_free_space (pasted, 0);
1151 tb->insert_size[0] = 0;
1154 #ifdef CONFIG_REISERFS_CHECK
1156 if ( tb->insert_size[0] ) {
1157 print_cur_tb ("12285");
1158 reiserfs_panic (tb->tb_sb, "PAP-12285: balance_leaf: insert_size must be 0 (%d)", tb->insert_size[0]);
1161 #endif /* CONFIG_REISERFS_CHECK */
1164 } /* case M_PASTE: */
1168 #ifdef CONFIG_REISERFS_CHECK
1169 if ( flag == M_PASTE && tb->insert_size[0] ) {
1170 print_cur_tb ("12290");
1171 reiserfs_panic (tb->tb_sb, "PAP-12290: balance_leaf: insert_size is still not 0 (%d)", tb->insert_size[0]);
1173 #endif /* CONFIG_REISERFS_CHECK */
1176 } /* Leaf level of the tree is balanced (end of balance_leaf) */
1180 /* Make empty node */
1181 void make_empty_node (struct buffer_info * bi)
1183 struct block_head * blkh;
1185 RFALSE( bi->bi_bh == NULL, "PAP-12295: pointer to the buffer is NULL");
1187 blkh = B_BLK_HEAD(bi->bi_bh);
1188 set_blkh_nr_item( blkh, 0 );
1189 set_blkh_free_space( blkh, MAX_CHILD_SIZE(bi->bi_bh) );
1192 B_N_CHILD (bi->bi_parent, bi->bi_position)->dc_size = 0; /* Endian safe if 0 */
1196 /* Get first empty buffer */
1197 struct buffer_head * get_FEB (struct tree_balance * tb)
1200 struct buffer_head * first_b;
1201 struct buffer_info bi;
1203 for (i = 0; i < MAX_FEB_SIZE; i ++)
1204 if (tb->FEB[i] != 0)
1207 if (i == MAX_FEB_SIZE)
1208 reiserfs_panic(tb->tb_sb, "vs-12300: get_FEB: FEB list is empty");
1211 bi.bi_bh = first_b = tb->FEB[i];
1214 make_empty_node (&bi);
1215 set_bit(BH_Uptodate, &first_b->b_state);
1217 tb->used[i] = first_b;
1223 /* This is now used because reiserfs_free_block has to be able to
1226 static void store_thrown (struct tree_balance * tb, struct buffer_head * bh)
1230 if (buffer_dirty (bh))
1231 reiserfs_warning (tb->tb_sb, "store_thrown deals with dirty buffer\n");
1232 for (i = 0; i < sizeof (tb->thrown)/sizeof (tb->thrown[0]); i ++)
1233 if (!tb->thrown[i]) {
1235 get_bh(bh) ; /* free_thrown puts this */
1238 reiserfs_warning (tb->tb_sb, "store_thrown: too many thrown buffers\n");
1241 static void free_thrown(struct tree_balance *tb) {
1243 unsigned long blocknr ;
1244 for (i = 0; i < sizeof (tb->thrown)/sizeof (tb->thrown[0]); i++) {
1245 if (tb->thrown[i]) {
1246 blocknr = tb->thrown[i]->b_blocknr ;
1247 if (buffer_dirty (tb->thrown[i]))
1248 reiserfs_warning (tb->tb_sb, "free_thrown deals with dirty buffer %ld\n", blocknr);
1249 brelse(tb->thrown[i]) ; /* incremented in store_thrown */
1250 reiserfs_free_block (tb->transaction_handle, blocknr);
1255 void reiserfs_invalidate_buffer (struct tree_balance * tb, struct buffer_head * bh)
1257 struct block_head *blkh;
1258 blkh = B_BLK_HEAD(bh);
1259 set_blkh_level( blkh, FREE_LEVEL );
1260 set_blkh_nr_item( blkh, 0 );
1262 mark_buffer_clean (bh);
1263 /* reiserfs_free_block is no longer schedule safe
1264 reiserfs_free_block (tb->transaction_handle, tb->tb_sb, bh->b_blocknr);
1267 store_thrown (tb, bh);
1270 /* Replace n_dest'th key in buffer dest by n_src'th key of buffer src.*/
1271 void replace_key (struct tree_balance * tb, struct buffer_head * dest, int n_dest,
1272 struct buffer_head * src, int n_src)
1275 RFALSE( dest == NULL || src == NULL,
1276 "vs-12305: source or destination buffer is 0 (src=%p, dest=%p)",
1278 RFALSE( ! B_IS_KEYS_LEVEL (dest),
1279 "vs-12310: invalid level (%z) for destination buffer. dest must be leaf",
1281 RFALSE( n_dest < 0 || n_src < 0,
1282 "vs-12315: src(%d) or dest(%d) key number < 0", n_src, n_dest);
1283 RFALSE( n_dest >= B_NR_ITEMS(dest) || n_src >= B_NR_ITEMS(src),
1284 "vs-12320: src(%d(%d)) or dest(%d(%d)) key number is too big",
1285 n_src, B_NR_ITEMS(src), n_dest, B_NR_ITEMS(dest));
1287 if (B_IS_ITEMS_LEVEL (src))
1288 /* source buffer contains leaf node */
1289 memcpy (B_N_PDELIM_KEY(dest,n_dest), B_N_PITEM_HEAD(src,n_src), KEY_SIZE);
1291 memcpy (B_N_PDELIM_KEY(dest,n_dest), B_N_PDELIM_KEY(src,n_src), KEY_SIZE);
1293 do_balance_mark_internal_dirty (tb, dest, 0);
1297 int get_left_neighbor_position (
1298 struct tree_balance * tb,
1302 int Sh_position = PATH_H_POSITION (tb->tb_path, h + 1);
1304 RFALSE( PATH_H_PPARENT (tb->tb_path, h) == 0 || tb->FL[h] == 0,
1305 "vs-12325: FL[%d](%p) or F[%d](%p) does not exist",
1306 h, tb->FL[h], h, PATH_H_PPARENT (tb->tb_path, h));
1308 if (Sh_position == 0)
1309 return B_NR_ITEMS (tb->FL[h]);
1311 return Sh_position - 1;
1315 int get_right_neighbor_position (struct tree_balance * tb, int h)
1317 int Sh_position = PATH_H_POSITION (tb->tb_path, h + 1);
1319 RFALSE( PATH_H_PPARENT (tb->tb_path, h) == 0 || tb->FR[h] == 0,
1320 "vs-12330: F[%d](%p) or FR[%d](%p) does not exist",
1321 h, PATH_H_PPARENT (tb->tb_path, h), h, tb->FR[h]);
1323 if (Sh_position == B_NR_ITEMS (PATH_H_PPARENT (tb->tb_path, h)))
1326 return Sh_position + 1;
1330 #ifdef CONFIG_REISERFS_CHECK
1332 int is_reusable (struct super_block * s, unsigned long block, int bit_value);
1333 static void check_internal_node (struct super_block * s, struct buffer_head * bh, char * mes)
1335 struct disk_child * dc;
1338 RFALSE( !bh, "PAP-12336: bh == 0");
1340 if (!bh || !B_IS_IN_TREE (bh))
1343 RFALSE( !buffer_dirty (bh) &&
1344 !(buffer_journaled(bh) || buffer_journal_dirty(bh)),
1345 "PAP-12337: buffer (%b) must be dirty", bh);
1346 dc = B_N_CHILD (bh, 0);
1348 for (i = 0; i <= B_NR_ITEMS (bh); i ++, dc ++) {
1349 if (!is_reusable (s, dc_block_number(dc), 1) ) {
1351 reiserfs_panic (s, "PAP-12338: check_internal_node: invalid child pointer %y in %b", dc, bh);
1357 static int locked_or_not_in_tree (struct buffer_head * bh, char * which)
1359 if ( buffer_locked (bh) || !B_IS_IN_TREE (bh) ) {
1360 reiserfs_warning (NULL, "vs-12339: locked_or_not_in_tree: %s (%b)\n", which, bh);
1367 static int check_before_balancing (struct tree_balance * tb)
1372 reiserfs_panic (tb->tb_sb, "vs-12335: check_before_balancing: "
1373 "suspect that schedule occurred based on cur_tb not being null at this point in code. "
1374 "do_balance cannot properly handle schedule occuring while it runs.");
1377 /* double check that buffers that we will modify are unlocked. (fix_nodes should already have
1378 prepped all of these for us). */
1379 if ( tb->lnum[0] ) {
1380 retval |= locked_or_not_in_tree (tb->L[0], "L[0]");
1381 retval |= locked_or_not_in_tree (tb->FL[0], "FL[0]");
1382 retval |= locked_or_not_in_tree (tb->CFL[0], "CFL[0]");
1383 check_leaf (tb->L[0]);
1385 if ( tb->rnum[0] ) {
1386 retval |= locked_or_not_in_tree (tb->R[0], "R[0]");
1387 retval |= locked_or_not_in_tree (tb->FR[0], "FR[0]");
1388 retval |= locked_or_not_in_tree (tb->CFR[0], "CFR[0]");
1389 check_leaf (tb->R[0]);
1391 retval |= locked_or_not_in_tree (PATH_PLAST_BUFFER (tb->tb_path), "S[0]");
1392 check_leaf (PATH_PLAST_BUFFER (tb->tb_path));
1398 void check_after_balance_leaf (struct tree_balance * tb)
1401 if (B_FREE_SPACE (tb->L[0]) !=
1402 MAX_CHILD_SIZE (tb->L[0]) - dc_size(B_N_CHILD (tb->FL[0], get_left_neighbor_position (tb, 0)))) {
1403 print_cur_tb ("12221");
1404 reiserfs_panic (tb->tb_sb, "PAP-12355: check_after_balance_leaf: shift to left was incorrect");
1408 if (B_FREE_SPACE (tb->R[0]) !=
1409 MAX_CHILD_SIZE (tb->R[0]) - dc_size(B_N_CHILD (tb->FR[0], get_right_neighbor_position (tb, 0)))) {
1410 print_cur_tb ("12222");
1411 reiserfs_panic (tb->tb_sb, "PAP-12360: check_after_balance_leaf: shift to right was incorrect");
1414 if (PATH_H_PBUFFER(tb->tb_path,1) &&
1415 (B_FREE_SPACE (PATH_H_PBUFFER(tb->tb_path,0)) !=
1416 (MAX_CHILD_SIZE (PATH_H_PBUFFER(tb->tb_path,0)) -
1417 dc_size(B_N_CHILD (PATH_H_PBUFFER(tb->tb_path,1),
1418 PATH_H_POSITION (tb->tb_path, 1)))) )) {
1419 int left = B_FREE_SPACE (PATH_H_PBUFFER(tb->tb_path,0));
1420 int right = (MAX_CHILD_SIZE (PATH_H_PBUFFER(tb->tb_path,0)) -
1421 dc_size(B_N_CHILD (PATH_H_PBUFFER(tb->tb_path,1),
1422 PATH_H_POSITION (tb->tb_path, 1))));
1423 print_cur_tb ("12223");
1424 reiserfs_warning( tb->tb_sb,
1425 "B_FREE_SPACE (PATH_H_PBUFFER(tb->tb_path,0)) = %d; "
1426 "MAX_CHILD_SIZE (%d) - dc_size( %y, %d ) [%d] = %d\n",
1428 MAX_CHILD_SIZE (PATH_H_PBUFFER(tb->tb_path,0)),
1429 PATH_H_PBUFFER(tb->tb_path,1),
1430 PATH_H_POSITION (tb->tb_path, 1),
1431 dc_size(B_N_CHILD (PATH_H_PBUFFER(tb->tb_path,1), PATH_H_POSITION (tb->tb_path, 1 )) ),
1433 reiserfs_panic (tb->tb_sb, "PAP-12365: check_after_balance_leaf: S is incorrect");
1438 void check_leaf_level (struct tree_balance * tb)
1440 check_leaf (tb->L[0]);
1441 check_leaf (tb->R[0]);
1442 check_leaf (PATH_PLAST_BUFFER (tb->tb_path));
1445 void check_internal_levels (struct tree_balance * tb)
1449 /* check all internal nodes */
1450 for (h = 1; tb->insert_size[h]; h ++) {
1451 check_internal_node (tb->tb_sb, PATH_H_PBUFFER (tb->tb_path, h), "BAD BUFFER ON PATH");
1453 check_internal_node (tb->tb_sb, tb->L[h], "BAD L");
1455 check_internal_node (tb->tb_sb, tb->R[h], "BAD R");
1467 /* Now we have all of the buffers that must be used in balancing of
1468 the tree. We rely on the assumption that schedule() will not occur
1469 while do_balance works. ( Only interrupt handlers are acceptable.)
1470 We balance the tree according to the analysis made before this,
1471 using buffers already obtained. For SMP support it will someday be
1472 necessary to add ordered locking of tb. */
1474 /* Some interesting rules of balancing:
1476 we delete a maximum of two nodes per level per balancing: we never
1477 delete R, when we delete two of three nodes L, S, R then we move
1480 we only delete L if we are deleting two nodes, if we delete only
1481 one node we delete S
1483 if we shift leaves then we shift as much as we can: this is a
1484 deliberate policy of extremism in node packing which results in
1485 higher average utilization after repeated random balance operations
1486 at the cost of more memory copies and more balancing as a result of
1487 small insertions to full nodes.
1489 if we shift internal nodes we try to evenly balance the node
1490 utilization, with consequent less balancing at the cost of lower
1493 one could argue that the policy for directories in leaves should be
1494 that of internal nodes, but we will wait until another day to
1495 evaluate this.... It would be nice to someday measure and prove
1496 these assumptions as to what is optimal....
1500 static inline void do_balance_starts (struct tree_balance *tb)
1502 /* use print_cur_tb() to see initial state of struct
1505 /* store_print_tb (tb); */
1507 /* do not delete, just comment it out */
1508 /* print_tb(flag, PATH_LAST_POSITION(tb->tb_path), tb->tb_path->pos_in_item, tb,
1510 RFALSE( check_before_balancing (tb), "PAP-12340: locked buffers in TB");
1511 #ifdef CONFIG_REISERFS_CHECK
1517 static inline void do_balance_completed (struct tree_balance * tb)
1520 #ifdef CONFIG_REISERFS_CHECK
1521 check_leaf_level (tb);
1522 check_internal_levels (tb);
1526 /* reiserfs_free_block is no longer schedule safe. So, we need to
1527 ** put the buffers we want freed on the thrown list during do_balance,
1528 ** and then free them now
1531 tb->tb_sb->u.reiserfs_sb.s_do_balance ++;
1534 /* release all nodes hold to perform the balancing */
1544 void do_balance (struct tree_balance * tb, /* tree_balance structure */
1545 struct item_head * ih, /* item header of inserted item */
1546 const char * body, /* body of inserted item or bytes to paste */
1547 int flag) /* i - insert, d - delete
1550 Cut means delete part of an item
1551 (includes removing an entry from a
1554 Delete means delete whole item.
1556 Insert means add a new item into the
1559 Paste means to append to the end of an
1560 existing file or to insert a directory
1563 int child_pos, /* position of a child node in its parent */
1564 h; /* level of the tree being processed */
1565 struct item_head insert_key[2]; /* in our processing of one level
1566 we sometimes determine what
1567 must be inserted into the next
1568 higher level. This insertion
1569 consists of a key or two keys
1570 and their corresponding
1572 struct buffer_head *insert_ptr[2]; /* inserted node-ptrs for the next
1576 tb->need_balance_dirty = 0;
1578 if (FILESYSTEM_CHANGED_TB(tb)) {
1579 reiserfs_panic(tb->tb_sb, "clm-6000: do_balance, fs generation has changed\n") ;
1581 /* if we have no real work to do */
1582 if ( ! tb->insert_size[0] ) {
1583 reiserfs_warning (tb->tb_sb, "PAP-12350: do_balance: insert_size == 0, mode == %c\n ",
1589 atomic_inc (&(fs_generation (tb->tb_sb)));
1590 do_balance_starts (tb);
1592 /* balance leaf returns 0 except if combining L R and S into
1593 one node. see balance_internal() for explanation of this
1595 child_pos = PATH_H_B_ITEM_ORDER (tb->tb_path, 0) +
1596 balance_leaf (tb, ih, body, flag, insert_key, insert_ptr);
1598 #ifdef CONFIG_REISERFS_CHECK
1599 check_after_balance_leaf (tb);
1602 /* Balance internal level of the tree. */
1603 for ( h = 1; h < MAX_HEIGHT && tb->insert_size[h]; h++ )
1604 child_pos = balance_internal (tb, h, child_pos, insert_key, insert_ptr);
1607 do_balance_completed (tb);