make oldconfig will rebuild these...
[linux-2.4.21-pre4.git] / fs / ext3 / balloc.c
1 /*
2  *  linux/fs/ext3/balloc.c
3  *
4  * Copyright (C) 1992, 1993, 1994, 1995
5  * Remy Card (card@masi.ibp.fr)
6  * Laboratoire MASI - Institut Blaise Pascal
7  * Universite Pierre et Marie Curie (Paris VI)
8  *
9  *  Enhanced block allocation by Stephen Tweedie (sct@redhat.com), 1993
10  *  Big-endian to little-endian byte-swapping/bitmaps by
11  *        David S. Miller (davem@caip.rutgers.edu), 1995
12  */
13
14 #include <linux/config.h>
15 #include <linux/sched.h>
16 #include <linux/fs.h>
17 #include <linux/jbd.h>
18 #include <linux/ext3_fs.h>
19 #include <linux/ext3_jbd.h>
20 #include <linux/locks.h>
21 #include <linux/quotaops.h>
22
23 /*
24  * balloc.c contains the blocks allocation and deallocation routines
25  */
26
27 /*
28  * The free blocks are managed by bitmaps.  A file system contains several
29  * blocks groups.  Each group contains 1 bitmap block for blocks, 1 bitmap
30  * block for inodes, N blocks for the inode table and data blocks.
31  *
32  * The file system contains group descriptors which are located after the
33  * super block.  Each descriptor contains the number of the bitmap block and
34  * the free blocks count in the block.  The descriptors are loaded in memory
35  * when a file system is mounted (see ext3_read_super).
36  */
37
38
39 #define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1)
40
41 struct ext3_group_desc * ext3_get_group_desc(struct super_block * sb,
42                                              unsigned int block_group,
43                                              struct buffer_head ** bh)
44 {
45         unsigned long group_desc;
46         unsigned long desc;
47         struct ext3_group_desc * gdp;
48
49         if (block_group >= sb->u.ext3_sb.s_groups_count) {
50                 ext3_error (sb, "ext3_get_group_desc",
51                             "block_group >= groups_count - "
52                             "block_group = %d, groups_count = %lu",
53                             block_group, sb->u.ext3_sb.s_groups_count);
54
55                 return NULL;
56         }
57         
58         group_desc = block_group / EXT3_DESC_PER_BLOCK(sb);
59         desc = block_group % EXT3_DESC_PER_BLOCK(sb);
60         if (!sb->u.ext3_sb.s_group_desc[group_desc]) {
61                 ext3_error (sb, "ext3_get_group_desc",
62                             "Group descriptor not loaded - "
63                             "block_group = %d, group_desc = %lu, desc = %lu",
64                              block_group, group_desc, desc);
65                 return NULL;
66         }
67         
68         gdp = (struct ext3_group_desc *) 
69               sb->u.ext3_sb.s_group_desc[group_desc]->b_data;
70         if (bh)
71                 *bh = sb->u.ext3_sb.s_group_desc[group_desc];
72         return gdp + desc;
73 }
74
75 /*
76  * Read the bitmap for a given block_group, reading into the specified 
77  * slot in the superblock's bitmap cache.
78  *
79  * Return >=0 on success or a -ve error code.
80  */
81
82 static int read_block_bitmap (struct super_block * sb,
83                                unsigned int block_group,
84                                unsigned long bitmap_nr)
85 {
86         struct ext3_group_desc * gdp;
87         struct buffer_head * bh = NULL;
88         int retval = -EIO;
89         
90         gdp = ext3_get_group_desc (sb, block_group, NULL);
91         if (!gdp)
92                 goto error_out;
93         retval = 0;
94         bh = sb_bread(sb, le32_to_cpu(gdp->bg_block_bitmap));
95         if (!bh) {
96                 ext3_error (sb, "read_block_bitmap",
97                             "Cannot read block bitmap - "
98                             "block_group = %d, block_bitmap = %lu",
99                             block_group, (unsigned long) gdp->bg_block_bitmap);
100                 retval = -EIO;
101         }
102         /*
103          * On IO error, just leave a zero in the superblock's block pointer for
104          * this group.  The IO will be retried next time.
105          */
106 error_out:
107         sb->u.ext3_sb.s_block_bitmap_number[bitmap_nr] = block_group;
108         sb->u.ext3_sb.s_block_bitmap[bitmap_nr] = bh;
109         return retval;
110 }
111
112 /*
113  * load_block_bitmap loads the block bitmap for a blocks group
114  *
115  * It maintains a cache for the last bitmaps loaded.  This cache is managed
116  * with a LRU algorithm.
117  *
118  * Notes:
119  * 1/ There is one cache per mounted file system.
120  * 2/ If the file system contains less than EXT3_MAX_GROUP_LOADED groups,
121  *    this function reads the bitmap without maintaining a LRU cache.
122  * 
123  * Return the slot used to store the bitmap, or a -ve error code.
124  */
125 static int __load_block_bitmap (struct super_block * sb,
126                                 unsigned int block_group)
127 {
128         int i, j, retval = 0;
129         unsigned long block_bitmap_number;
130         struct buffer_head * block_bitmap;
131
132         if (block_group >= sb->u.ext3_sb.s_groups_count)
133                 ext3_panic (sb, "load_block_bitmap",
134                             "block_group >= groups_count - "
135                             "block_group = %d, groups_count = %lu",
136                             block_group, sb->u.ext3_sb.s_groups_count);
137
138         if (sb->u.ext3_sb.s_groups_count <= EXT3_MAX_GROUP_LOADED) {
139                 if (sb->u.ext3_sb.s_block_bitmap[block_group]) {
140                         if (sb->u.ext3_sb.s_block_bitmap_number[block_group] ==
141                             block_group)
142                                 return block_group;
143                         ext3_error (sb, "__load_block_bitmap",
144                                     "block_group != block_bitmap_number");
145                 }
146                 retval = read_block_bitmap (sb, block_group, block_group);
147                 if (retval < 0)
148                         return retval;
149                 return block_group;
150         }
151
152         for (i = 0; i < sb->u.ext3_sb.s_loaded_block_bitmaps &&
153                     sb->u.ext3_sb.s_block_bitmap_number[i] != block_group; i++)
154                 ;
155         if (i < sb->u.ext3_sb.s_loaded_block_bitmaps &&
156             sb->u.ext3_sb.s_block_bitmap_number[i] == block_group) {
157                 block_bitmap_number = sb->u.ext3_sb.s_block_bitmap_number[i];
158                 block_bitmap = sb->u.ext3_sb.s_block_bitmap[i];
159                 for (j = i; j > 0; j--) {
160                         sb->u.ext3_sb.s_block_bitmap_number[j] =
161                                 sb->u.ext3_sb.s_block_bitmap_number[j - 1];
162                         sb->u.ext3_sb.s_block_bitmap[j] =
163                                 sb->u.ext3_sb.s_block_bitmap[j - 1];
164                 }
165                 sb->u.ext3_sb.s_block_bitmap_number[0] = block_bitmap_number;
166                 sb->u.ext3_sb.s_block_bitmap[0] = block_bitmap;
167
168                 /*
169                  * There's still one special case here --- if block_bitmap == 0
170                  * then our last attempt to read the bitmap failed and we have
171                  * just ended up caching that failure.  Try again to read it.
172                  */
173                 if (!block_bitmap)
174                         retval = read_block_bitmap (sb, block_group, 0);
175         } else {
176                 if (sb->u.ext3_sb.s_loaded_block_bitmaps<EXT3_MAX_GROUP_LOADED)
177                         sb->u.ext3_sb.s_loaded_block_bitmaps++;
178                 else
179                         brelse (sb->u.ext3_sb.s_block_bitmap
180                                         [EXT3_MAX_GROUP_LOADED - 1]);
181                 for (j = sb->u.ext3_sb.s_loaded_block_bitmaps - 1;
182                                         j > 0;  j--) {
183                         sb->u.ext3_sb.s_block_bitmap_number[j] =
184                                 sb->u.ext3_sb.s_block_bitmap_number[j - 1];
185                         sb->u.ext3_sb.s_block_bitmap[j] =
186                                 sb->u.ext3_sb.s_block_bitmap[j - 1];
187                 }
188                 retval = read_block_bitmap (sb, block_group, 0);
189         }
190         return retval;
191 }
192
193 /*
194  * Load the block bitmap for a given block group.  First of all do a couple
195  * of fast lookups for common cases and then pass the request onto the guts
196  * of the bitmap loader.
197  *
198  * Return the slot number of the group in the superblock bitmap cache's on
199  * success, or a -ve error code.
200  *
201  * There is still one inconsistency here --- if the number of groups in this
202  * filesystems is <= EXT3_MAX_GROUP_LOADED, then we have no way of 
203  * differentiating between a group for which we have never performed a bitmap
204  * IO request, and a group for which the last bitmap read request failed.
205  */
206 static inline int load_block_bitmap (struct super_block * sb,
207                                      unsigned int block_group)
208 {
209         int slot;
210         
211         /*
212          * Do the lookup for the slot.  First of all, check if we're asking
213          * for the same slot as last time, and did we succeed that last time?
214          */
215         if (sb->u.ext3_sb.s_loaded_block_bitmaps > 0 &&
216             sb->u.ext3_sb.s_block_bitmap_number[0] == block_group &&
217             sb->u.ext3_sb.s_block_bitmap[0]) {
218                 return 0;
219         }
220         /*
221          * Or can we do a fast lookup based on a loaded group on a filesystem
222          * small enough to be mapped directly into the superblock?
223          */
224         else if (sb->u.ext3_sb.s_groups_count <= EXT3_MAX_GROUP_LOADED && 
225                  sb->u.ext3_sb.s_block_bitmap_number[block_group]==block_group
226                         && sb->u.ext3_sb.s_block_bitmap[block_group]) {
227                 slot = block_group;
228         }
229         /*
230          * If not, then do a full lookup for this block group.
231          */
232         else {
233                 slot = __load_block_bitmap (sb, block_group);
234         }
235
236         /*
237          * <0 means we just got an error
238          */
239         if (slot < 0)
240                 return slot;
241         
242         /*
243          * If it's a valid slot, we may still have cached a previous IO error,
244          * in which case the bh in the superblock cache will be zero.
245          */
246         if (!sb->u.ext3_sb.s_block_bitmap[slot])
247                 return -EIO;
248         
249         /*
250          * Must have been read in OK to get this far.
251          */
252         return slot;
253 }
254
255 /* Free given blocks, update quota and i_blocks field */
256 void ext3_free_blocks (handle_t *handle, struct inode * inode,
257                         unsigned long block, unsigned long count)
258 {
259         struct buffer_head *bitmap_bh;
260         struct buffer_head *gd_bh;
261         unsigned long block_group;
262         unsigned long bit;
263         unsigned long i;
264         int bitmap_nr;
265         unsigned long overflow;
266         struct super_block * sb;
267         struct ext3_group_desc * gdp;
268         struct ext3_super_block * es;
269         int err = 0, ret;
270         int dquot_freed_blocks = 0;
271
272         sb = inode->i_sb;
273         if (!sb) {
274                 printk ("ext3_free_blocks: nonexistent device");
275                 return;
276         }
277         lock_super (sb);
278         es = sb->u.ext3_sb.s_es;
279         if (block < le32_to_cpu(es->s_first_data_block) || 
280             (block + count) > le32_to_cpu(es->s_blocks_count)) {
281                 ext3_error (sb, "ext3_free_blocks",
282                             "Freeing blocks not in datazone - "
283                             "block = %lu, count = %lu", block, count);
284                 goto error_return;
285         }
286
287         ext3_debug ("freeing block %lu\n", block);
288
289 do_more:
290         overflow = 0;
291         block_group = (block - le32_to_cpu(es->s_first_data_block)) /
292                       EXT3_BLOCKS_PER_GROUP(sb);
293         bit = (block - le32_to_cpu(es->s_first_data_block)) %
294                       EXT3_BLOCKS_PER_GROUP(sb);
295         /*
296          * Check to see if we are freeing blocks across a group
297          * boundary.
298          */
299         if (bit + count > EXT3_BLOCKS_PER_GROUP(sb)) {
300                 overflow = bit + count - EXT3_BLOCKS_PER_GROUP(sb);
301                 count -= overflow;
302         }
303         bitmap_nr = load_block_bitmap (sb, block_group);
304         if (bitmap_nr < 0)
305                 goto error_return;
306         
307         bitmap_bh = sb->u.ext3_sb.s_block_bitmap[bitmap_nr];
308         gdp = ext3_get_group_desc (sb, block_group, &gd_bh);
309         if (!gdp)
310                 goto error_return;
311
312         if (in_range (le32_to_cpu(gdp->bg_block_bitmap), block, count) ||
313             in_range (le32_to_cpu(gdp->bg_inode_bitmap), block, count) ||
314             in_range (block, le32_to_cpu(gdp->bg_inode_table),
315                       sb->u.ext3_sb.s_itb_per_group) ||
316             in_range (block + count - 1, le32_to_cpu(gdp->bg_inode_table),
317                       sb->u.ext3_sb.s_itb_per_group))
318                 ext3_error (sb, "ext3_free_blocks",
319                             "Freeing blocks in system zones - "
320                             "Block = %lu, count = %lu",
321                             block, count);
322
323         /*
324          * We are about to start releasing blocks in the bitmap,
325          * so we need undo access.
326          */
327         /* @@@ check errors */
328         BUFFER_TRACE(bitmap_bh, "getting undo access");
329         err = ext3_journal_get_undo_access(handle, bitmap_bh);
330         if (err)
331                 goto error_return;
332         
333         /*
334          * We are about to modify some metadata.  Call the journal APIs
335          * to unshare ->b_data if a currently-committing transaction is
336          * using it
337          */
338         BUFFER_TRACE(gd_bh, "get_write_access");
339         err = ext3_journal_get_write_access(handle, gd_bh);     
340         if (err)
341                 goto error_return;
342
343         BUFFER_TRACE(sb->u.ext3_sb.s_sbh, "get_write_access");
344         err = ext3_journal_get_write_access(handle, sb->u.ext3_sb.s_sbh);
345         if (err)
346                 goto error_return;
347
348         for (i = 0; i < count; i++) {
349                 /*
350                  * An HJ special.  This is expensive...
351                  */
352 #ifdef CONFIG_JBD_DEBUG
353                 {
354                         struct buffer_head *debug_bh;
355                         debug_bh = sb_get_hash_table(sb, block + i);
356                         if (debug_bh) {
357                                 BUFFER_TRACE(debug_bh, "Deleted!");
358                                 if (!bh2jh(bitmap_bh)->b_committed_data)
359                                         BUFFER_TRACE(debug_bh,
360                                                 "No commited data in bitmap");
361                                 BUFFER_TRACE2(debug_bh, bitmap_bh, "bitmap");
362                                 __brelse(debug_bh);
363                         }
364                 }
365 #endif
366                 BUFFER_TRACE(bitmap_bh, "clear bit");
367                 if (!ext3_clear_bit (bit + i, bitmap_bh->b_data)) {
368                         ext3_error (sb, __FUNCTION__,
369                                       "bit already cleared for block %lu", 
370                                       block + i);
371                         BUFFER_TRACE(bitmap_bh, "bit already cleared");
372                 } else {
373                         dquot_freed_blocks++;
374                         gdp->bg_free_blocks_count =
375                           cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count)+1);
376                         es->s_free_blocks_count =
377                           cpu_to_le32(le32_to_cpu(es->s_free_blocks_count)+1);
378                 }
379                 /* @@@ This prevents newly-allocated data from being
380                  * freed and then reallocated within the same
381                  * transaction. 
382                  * 
383                  * Ideally we would want to allow that to happen, but to
384                  * do so requires making journal_forget() capable of
385                  * revoking the queued write of a data block, which
386                  * implies blocking on the journal lock.  *forget()
387                  * cannot block due to truncate races.
388                  *
389                  * Eventually we can fix this by making journal_forget()
390                  * return a status indicating whether or not it was able
391                  * to revoke the buffer.  On successful revoke, it is
392                  * safe not to set the allocation bit in the committed
393                  * bitmap, because we know that there is no outstanding
394                  * activity on the buffer any more and so it is safe to
395                  * reallocate it.  
396                  */
397                 BUFFER_TRACE(bitmap_bh, "clear in b_committed_data");
398                 J_ASSERT_BH(bitmap_bh,
399                                 bh2jh(bitmap_bh)->b_committed_data != NULL);
400                 ext3_set_bit(bit + i, bh2jh(bitmap_bh)->b_committed_data);
401         }
402
403         /* We dirtied the bitmap block */
404         BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
405         err = ext3_journal_dirty_metadata(handle, bitmap_bh);
406
407         /* And the group descriptor block */
408         BUFFER_TRACE(gd_bh, "dirtied group descriptor block");
409         ret = ext3_journal_dirty_metadata(handle, gd_bh);
410         if (!err) err = ret;
411
412         /* And the superblock */
413         BUFFER_TRACE(sb->u.ext3_sb.s_sbh, "dirtied superblock");
414         ret = ext3_journal_dirty_metadata(handle, sb->u.ext3_sb.s_sbh);
415         if (!err) err = ret;
416
417         if (overflow && !err) {
418                 block += count;
419                 count = overflow;
420                 goto do_more;
421         }
422         sb->s_dirt = 1;
423 error_return:
424         ext3_std_error(sb, err);
425         unlock_super(sb);
426         if (dquot_freed_blocks)
427                 DQUOT_FREE_BLOCK(inode, dquot_freed_blocks);
428         return;
429 }
430
431 /* For ext3 allocations, we must not reuse any blocks which are
432  * allocated in the bitmap buffer's "last committed data" copy.  This
433  * prevents deletes from freeing up the page for reuse until we have
434  * committed the delete transaction.
435  *
436  * If we didn't do this, then deleting something and reallocating it as
437  * data would allow the old block to be overwritten before the
438  * transaction committed (because we force data to disk before commit).
439  * This would lead to corruption if we crashed between overwriting the
440  * data and committing the delete. 
441  *
442  * @@@ We may want to make this allocation behaviour conditional on
443  * data-writes at some point, and disable it for metadata allocations or
444  * sync-data inodes.
445  */
446 static int ext3_test_allocatable(int nr, struct buffer_head *bh)
447 {
448         if (ext3_test_bit(nr, bh->b_data))
449                 return 0;
450         if (!buffer_jbd(bh) || !bh2jh(bh)->b_committed_data)
451                 return 1;
452         return !ext3_test_bit(nr, bh2jh(bh)->b_committed_data);
453 }
454
455 /*
456  * Find an allocatable block in a bitmap.  We honour both the bitmap and
457  * its last-committed copy (if that exists), and perform the "most
458  * appropriate allocation" algorithm of looking for a free block near
459  * the initial goal; then for a free byte somewhere in the bitmap; then
460  * for any free bit in the bitmap.
461  */
462 static int find_next_usable_block(int start,
463                         struct buffer_head *bh, int maxblocks)
464 {
465         int here, next;
466         char *p, *r;
467         
468         if (start > 0) {
469                 /*
470                  * The goal was occupied; search forward for a free 
471                  * block within the next XX blocks.
472                  *
473                  * end_goal is more or less random, but it has to be
474                  * less than EXT3_BLOCKS_PER_GROUP. Aligning up to the
475                  * next 64-bit boundary is simple..
476                  */
477                 int end_goal = (start + 63) & ~63;
478                 here = ext3_find_next_zero_bit(bh->b_data, end_goal, start);
479                 if (here < end_goal && ext3_test_allocatable(here, bh))
480                         return here;
481                 
482                 ext3_debug ("Bit not found near goal\n");
483                 
484         }
485         
486         here = start;
487         if (here < 0)
488                 here = 0;
489         
490         /*
491          * There has been no free block found in the near vicinity of
492          * the goal: do a search forward through the block groups,
493          * searching in each group first for an entire free byte in the
494          * bitmap and then for any free bit.
495          * 
496          * Search first in the remainder of the current group 
497          */
498         p = ((char *) bh->b_data) + (here >> 3);
499         r = memscan(p, 0, (maxblocks - here + 7) >> 3);
500         next = (r - ((char *) bh->b_data)) << 3;
501         
502         if (next < maxblocks && ext3_test_allocatable(next, bh))
503                 return next;
504         
505         /* The bitmap search --- search forward alternately
506          * through the actual bitmap and the last-committed copy
507          * until we find a bit free in both. */
508
509         while (here < maxblocks) {
510                 next  = ext3_find_next_zero_bit ((unsigned long *) bh->b_data, 
511                                                  maxblocks, here);
512                 if (next >= maxblocks)
513                         return -1;
514                 if (ext3_test_allocatable(next, bh))
515                         return next;
516
517                 J_ASSERT_BH(bh, bh2jh(bh)->b_committed_data);
518                 here = ext3_find_next_zero_bit
519                         ((unsigned long *) bh2jh(bh)->b_committed_data, 
520                          maxblocks, next);
521         }
522         return -1;
523 }
524
525 /*
526  * ext3_new_block uses a goal block to assist allocation.  If the goal is
527  * free, or there is a free block within 32 blocks of the goal, that block
528  * is allocated.  Otherwise a forward search is made for a free block; within 
529  * each block group the search first looks for an entire free byte in the block
530  * bitmap, and then for any free bit if that fails.
531  * This function also updates quota and i_blocks field.
532  */
533 int ext3_new_block (handle_t *handle, struct inode * inode,
534                 unsigned long goal, u32 * prealloc_count,
535                 u32 * prealloc_block, int * errp)
536 {
537         struct buffer_head * bh, *bhtmp;
538         struct buffer_head * bh2;
539 #if 0
540         char * p, * r;
541 #endif
542         int i, j, k, tmp, alloctmp;
543         int bitmap_nr;
544         int fatal = 0, err;
545         int performed_allocation = 0;
546         struct super_block * sb;
547         struct ext3_group_desc * gdp;
548         struct ext3_super_block * es;
549 #ifdef EXT3FS_DEBUG
550         static int goal_hits = 0, goal_attempts = 0;
551 #endif
552         *errp = -ENOSPC;
553         sb = inode->i_sb;
554         if (!sb) {
555                 printk ("ext3_new_block: nonexistent device");
556                 return 0;
557         }
558
559         /*
560          * Check quota for allocation of this block.
561          */
562         if (DQUOT_ALLOC_BLOCK(inode, 1)) {
563                 *errp = -EDQUOT;
564                 return 0;
565         }
566
567         lock_super (sb);
568         es = sb->u.ext3_sb.s_es;
569         if (le32_to_cpu(es->s_free_blocks_count) <=
570                         le32_to_cpu(es->s_r_blocks_count) &&
571             ((sb->u.ext3_sb.s_resuid != current->fsuid) &&
572              (sb->u.ext3_sb.s_resgid == 0 ||
573               !in_group_p (sb->u.ext3_sb.s_resgid)) && 
574              !capable(CAP_SYS_RESOURCE)))
575                 goto out;
576
577         ext3_debug ("goal=%lu.\n", goal);
578
579         /*
580          * First, test whether the goal block is free.
581          */
582         if (goal < le32_to_cpu(es->s_first_data_block) ||
583             goal >= le32_to_cpu(es->s_blocks_count))
584                 goal = le32_to_cpu(es->s_first_data_block);
585         i = (goal - le32_to_cpu(es->s_first_data_block)) /
586                         EXT3_BLOCKS_PER_GROUP(sb);
587         gdp = ext3_get_group_desc (sb, i, &bh2);
588         if (!gdp)
589                 goto io_error;
590
591         if (le16_to_cpu(gdp->bg_free_blocks_count) > 0) {
592                 j = ((goal - le32_to_cpu(es->s_first_data_block)) %
593                                 EXT3_BLOCKS_PER_GROUP(sb));
594 #ifdef EXT3FS_DEBUG
595                 if (j)
596                         goal_attempts++;
597 #endif
598                 bitmap_nr = load_block_bitmap (sb, i);
599                 if (bitmap_nr < 0)
600                         goto io_error;
601                 
602                 bh = sb->u.ext3_sb.s_block_bitmap[bitmap_nr];
603
604                 ext3_debug ("goal is at %d:%d.\n", i, j);
605
606                 if (ext3_test_allocatable(j, bh)) {
607 #ifdef EXT3FS_DEBUG
608                         goal_hits++;
609                         ext3_debug ("goal bit allocated.\n");
610 #endif
611                         goto got_block;
612                 }
613
614                 j = find_next_usable_block(j, bh, EXT3_BLOCKS_PER_GROUP(sb));
615                 if (j >= 0)
616                         goto search_back;
617         }
618
619         ext3_debug ("Bit not found in block group %d.\n", i);
620
621         /*
622          * Now search the rest of the groups.  We assume that 
623          * i and gdp correctly point to the last group visited.
624          */
625         for (k = 0; k < sb->u.ext3_sb.s_groups_count; k++) {
626                 i++;
627                 if (i >= sb->u.ext3_sb.s_groups_count)
628                         i = 0;
629                 gdp = ext3_get_group_desc (sb, i, &bh2);
630                 if (!gdp) {
631                         *errp = -EIO;
632                         goto out;
633                 }
634                 if (le16_to_cpu(gdp->bg_free_blocks_count) > 0) {
635                         bitmap_nr = load_block_bitmap (sb, i);
636                         if (bitmap_nr < 0)
637                                 goto io_error;
638         
639                         bh = sb->u.ext3_sb.s_block_bitmap[bitmap_nr];
640                         j = find_next_usable_block(-1, bh, 
641                                                    EXT3_BLOCKS_PER_GROUP(sb));
642                         if (j >= 0) 
643                                 goto search_back;
644                 }
645         }
646
647         /* No space left on the device */
648         goto out;
649
650 search_back:
651         /* 
652          * We have succeeded in finding a free byte in the block
653          * bitmap.  Now search backwards up to 7 bits to find the
654          * start of this group of free blocks.
655          */
656         for (   k = 0;
657                 k < 7 && j > 0 && ext3_test_allocatable(j - 1, bh);
658                 k++, j--)
659                 ;
660         
661 got_block:
662
663         ext3_debug ("using block group %d(%d)\n", i, gdp->bg_free_blocks_count);
664
665         /* Make sure we use undo access for the bitmap, because it is
666            critical that we do the frozen_data COW on bitmap buffers in
667            all cases even if the buffer is in BJ_Forget state in the
668            committing transaction.  */
669         BUFFER_TRACE(bh, "get undo access for marking new block");
670         fatal = ext3_journal_get_undo_access(handle, bh);
671         if (fatal) goto out;
672         
673         BUFFER_TRACE(bh2, "get_write_access");
674         fatal = ext3_journal_get_write_access(handle, bh2);
675         if (fatal) goto out;
676
677         BUFFER_TRACE(sb->u.ext3_sb.s_sbh, "get_write_access");
678         fatal = ext3_journal_get_write_access(handle, sb->u.ext3_sb.s_sbh);
679         if (fatal) goto out;
680
681         tmp = j + i * EXT3_BLOCKS_PER_GROUP(sb)
682                                 + le32_to_cpu(es->s_first_data_block);
683
684         if (tmp == le32_to_cpu(gdp->bg_block_bitmap) ||
685             tmp == le32_to_cpu(gdp->bg_inode_bitmap) ||
686             in_range (tmp, le32_to_cpu(gdp->bg_inode_table),
687                       sb->u.ext3_sb.s_itb_per_group))
688                 ext3_error (sb, "ext3_new_block",
689                             "Allocating block in system zone - "
690                             "block = %u", tmp);
691
692         /* The superblock lock should guard against anybody else beating
693          * us to this point! */
694         J_ASSERT_BH(bh, !ext3_test_bit(j, bh->b_data));
695         BUFFER_TRACE(bh, "setting bitmap bit");
696         ext3_set_bit(j, bh->b_data);
697         performed_allocation = 1;
698
699 #ifdef CONFIG_JBD_DEBUG
700         {
701                 struct buffer_head *debug_bh;
702
703                 /* Record bitmap buffer state in the newly allocated block */
704                 debug_bh = sb_get_hash_table(sb, tmp);
705                 if (debug_bh) {
706                         BUFFER_TRACE(debug_bh, "state when allocated");
707                         BUFFER_TRACE2(debug_bh, bh, "bitmap state");
708                         brelse(debug_bh);
709                 }
710         }
711 #endif
712         if (buffer_jbd(bh) && bh2jh(bh)->b_committed_data)
713                 J_ASSERT_BH(bh, !ext3_test_bit(j, bh2jh(bh)->b_committed_data));
714         bhtmp = bh;
715         alloctmp = j;
716
717         ext3_debug ("found bit %d\n", j);
718
719         /*
720          * Do block preallocation now if required.
721          */
722 #ifdef EXT3_PREALLOCATE
723         /*
724          * akpm: this is not enabled for ext3.  Need to use
725          * ext3_test_allocatable()
726          */
727         /* Writer: ->i_prealloc* */
728         if (prealloc_count && !*prealloc_count) {
729                 int     prealloc_goal;
730                 unsigned long next_block = tmp + 1;
731
732                 prealloc_goal = es->s_prealloc_blocks ?
733                         es->s_prealloc_blocks : EXT3_DEFAULT_PREALLOC_BLOCKS;
734
735                 *prealloc_block = next_block;
736                 /* Writer: end */
737                 for (k = 1;
738                      k < prealloc_goal && (j + k) < EXT3_BLOCKS_PER_GROUP(sb);
739                      k++, next_block++) {
740                         if (DQUOT_PREALLOC_BLOCK(inode, 1))
741                                 break;
742                         /* Writer: ->i_prealloc* */
743                         if (*prealloc_block + *prealloc_count != next_block ||
744                             ext3_set_bit (j + k, bh->b_data)) {
745                                 /* Writer: end */
746                                 DQUOT_FREE_BLOCK(inode, 1);
747                                 break;
748                         }
749                         (*prealloc_count)++;
750                         /* Writer: end */
751                 }       
752                 /*
753                  * As soon as we go for per-group spinlocks we'll need these
754                  * done inside the loop above.
755                  */
756                 gdp->bg_free_blocks_count =
757                         cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) -
758                                (k - 1));
759                 es->s_free_blocks_count =
760                         cpu_to_le32(le32_to_cpu(es->s_free_blocks_count) -
761                                (k - 1));
762                 ext3_debug ("Preallocated a further %lu bits.\n",
763                                (k - 1));
764         }
765 #endif
766
767         j = tmp;
768
769         BUFFER_TRACE(bh, "journal_dirty_metadata for bitmap block");
770         err = ext3_journal_dirty_metadata(handle, bh);
771         if (!fatal) fatal = err;
772         
773         if (j >= le32_to_cpu(es->s_blocks_count)) {
774                 ext3_error (sb, "ext3_new_block",
775                             "block(%d) >= blocks count(%d) - "
776                             "block_group = %d, es == %p ",j,
777                         le32_to_cpu(es->s_blocks_count), i, es);
778                 goto out;
779         }
780
781         /*
782          * It is up to the caller to add the new buffer to a journal
783          * list of some description.  We don't know in advance whether
784          * the caller wants to use it as metadata or data.
785          */
786
787         ext3_debug ("allocating block %d. "
788                     "Goal hits %d of %d.\n", j, goal_hits, goal_attempts);
789
790         gdp->bg_free_blocks_count =
791                         cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) - 1);
792         es->s_free_blocks_count =
793                         cpu_to_le32(le32_to_cpu(es->s_free_blocks_count) - 1);
794
795         BUFFER_TRACE(bh2, "journal_dirty_metadata for group descriptor");
796         err = ext3_journal_dirty_metadata(handle, bh2);
797         if (!fatal) fatal = err;
798         
799         BUFFER_TRACE(bh, "journal_dirty_metadata for superblock");
800         err = ext3_journal_dirty_metadata(handle, sb->u.ext3_sb.s_sbh);
801         if (!fatal) fatal = err;
802
803         sb->s_dirt = 1;
804         if (fatal)
805                 goto out;
806
807         unlock_super (sb);
808         *errp = 0;
809         return j;
810         
811 io_error:
812         *errp = -EIO;
813 out:
814         if (fatal) {
815                 *errp = fatal;
816                 ext3_std_error(sb, fatal);
817         }
818         unlock_super (sb);
819         /*
820          * Undo the block allocation
821          */
822         if (!performed_allocation)
823                 DQUOT_FREE_BLOCK(inode, 1);
824         return 0;
825         
826 }
827
828 unsigned long ext3_count_free_blocks (struct super_block * sb)
829 {
830 #ifdef EXT3FS_DEBUG
831         struct ext3_super_block * es;
832         unsigned long desc_count, bitmap_count, x;
833         int bitmap_nr;
834         struct ext3_group_desc * gdp;
835         int i;
836         
837         lock_super (sb);
838         es = sb->u.ext3_sb.s_es;
839         desc_count = 0;
840         bitmap_count = 0;
841         gdp = NULL;
842         for (i = 0; i < sb->u.ext3_sb.s_groups_count; i++) {
843                 gdp = ext3_get_group_desc (sb, i, NULL);
844                 if (!gdp)
845                         continue;
846                 desc_count += le16_to_cpu(gdp->bg_free_blocks_count);
847                 bitmap_nr = load_block_bitmap (sb, i);
848                 if (bitmap_nr < 0)
849                         continue;
850                 
851                 x = ext3_count_free (sb->u.ext3_sb.s_block_bitmap[bitmap_nr],
852                                      sb->s_blocksize);
853                 printk ("group %d: stored = %d, counted = %lu\n",
854                         i, le16_to_cpu(gdp->bg_free_blocks_count), x);
855                 bitmap_count += x;
856         }
857         printk("ext3_count_free_blocks: stored = %lu, computed = %lu, %lu\n",
858                le32_to_cpu(es->s_free_blocks_count), desc_count, bitmap_count);
859         unlock_super (sb);
860         return bitmap_count;
861 #else
862         return le32_to_cpu(sb->u.ext3_sb.s_es->s_free_blocks_count);
863 #endif
864 }
865
866 static inline int block_in_use (unsigned long block,
867                                 struct super_block * sb,
868                                 unsigned char * map)
869 {
870         return ext3_test_bit ((block -
871                 le32_to_cpu(sb->u.ext3_sb.s_es->s_first_data_block)) %
872                          EXT3_BLOCKS_PER_GROUP(sb), map);
873 }
874
875 static inline int test_root(int a, int b)
876 {
877         if (a == 0)
878                 return 1;
879         while (1) {
880                 if (a == 1)
881                         return 1;
882                 if (a % b)
883                         return 0;
884                 a = a / b;
885         }
886 }
887
888 int ext3_group_sparse(int group)
889 {
890         return (test_root(group, 3) || test_root(group, 5) ||
891                 test_root(group, 7));
892 }
893
894 /**
895  *      ext3_bg_has_super - number of blocks used by the superblock in group
896  *      @sb: superblock for filesystem
897  *      @group: group number to check
898  *
899  *      Return the number of blocks used by the superblock (primary or backup)
900  *      in this group.  Currently this will be only 0 or 1.
901  */
902 int ext3_bg_has_super(struct super_block *sb, int group)
903 {
904         if (EXT3_HAS_RO_COMPAT_FEATURE(sb,EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER)&&
905             !ext3_group_sparse(group))
906                 return 0;
907         return 1;
908 }
909
910 /**
911  *      ext3_bg_num_gdb - number of blocks used by the group table in group
912  *      @sb: superblock for filesystem
913  *      @group: group number to check
914  *
915  *      Return the number of blocks used by the group descriptor table
916  *      (primary or backup) in this group.  In the future there may be a
917  *      different number of descriptor blocks in each group.
918  */
919 unsigned long ext3_bg_num_gdb(struct super_block *sb, int group)
920 {
921         if (EXT3_HAS_RO_COMPAT_FEATURE(sb,EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER)&&
922             !ext3_group_sparse(group))
923                 return 0;
924         return EXT3_SB(sb)->s_gdb_count;
925 }
926
927 #ifdef CONFIG_EXT3_CHECK
928 /* Called at mount-time, super-block is locked */
929 void ext3_check_blocks_bitmap (struct super_block * sb)
930 {
931         struct buffer_head * bh;
932         struct ext3_super_block * es;
933         unsigned long desc_count, bitmap_count, x, j;
934         unsigned long desc_blocks;
935         int bitmap_nr;
936         struct ext3_group_desc * gdp;
937         int i;
938
939         es = sb->u.ext3_sb.s_es;
940         desc_count = 0;
941         bitmap_count = 0;
942         gdp = NULL;
943         for (i = 0; i < sb->u.ext3_sb.s_groups_count; i++) {
944                 gdp = ext3_get_group_desc (sb, i, NULL);
945                 if (!gdp)
946                         continue;
947                 desc_count += le16_to_cpu(gdp->bg_free_blocks_count);
948                 bitmap_nr = load_block_bitmap (sb, i);
949                 if (bitmap_nr < 0)
950                         continue;
951
952                 bh = EXT3_SB(sb)->s_block_bitmap[bitmap_nr];
953
954                 if (ext3_bg_has_super(sb, i) && !ext3_test_bit(0, bh->b_data))
955                         ext3_error(sb, __FUNCTION__,
956                                    "Superblock in group %d is marked free", i);
957
958                 desc_blocks = ext3_bg_num_gdb(sb, i);
959                 for (j = 0; j < desc_blocks; j++)
960                         if (!ext3_test_bit(j + 1, bh->b_data))
961                                 ext3_error(sb, __FUNCTION__,
962                                            "Descriptor block #%ld in group "
963                                            "%d is marked free", j, i);
964
965                 if (!block_in_use (le32_to_cpu(gdp->bg_block_bitmap),
966                                                 sb, bh->b_data))
967                         ext3_error (sb, "ext3_check_blocks_bitmap",
968                                     "Block bitmap for group %d is marked free",
969                                     i);
970
971                 if (!block_in_use (le32_to_cpu(gdp->bg_inode_bitmap),
972                                                 sb, bh->b_data))
973                         ext3_error (sb, "ext3_check_blocks_bitmap",
974                                     "Inode bitmap for group %d is marked free",
975                                     i);
976
977                 for (j = 0; j < sb->u.ext3_sb.s_itb_per_group; j++)
978                         if (!block_in_use (le32_to_cpu(gdp->bg_inode_table) + j,
979                                                         sb, bh->b_data))
980                                 ext3_error (sb, "ext3_check_blocks_bitmap",
981                                             "Block #%d of the inode table in "
982                                             "group %d is marked free", j, i);
983
984                 x = ext3_count_free (bh, sb->s_blocksize);
985                 if (le16_to_cpu(gdp->bg_free_blocks_count) != x)
986                         ext3_error (sb, "ext3_check_blocks_bitmap",
987                                     "Wrong free blocks count for group %d, "
988                                     "stored = %d, counted = %lu", i,
989                                     le16_to_cpu(gdp->bg_free_blocks_count), x);
990                 bitmap_count += x;
991         }
992         if (le32_to_cpu(es->s_free_blocks_count) != bitmap_count)
993                 ext3_error (sb, "ext3_check_blocks_bitmap",
994                         "Wrong free blocks count in super block, "
995                         "stored = %lu, counted = %lu",
996                         (unsigned long)le32_to_cpu(es->s_free_blocks_count),
997                         bitmap_count);
998 }
999 #endif