more changes on original files
[linux-2.4.git] / fs / ext3 / balloc.c
1 /*
2  *  linux/fs/ext3/balloc.c
3  *
4  * Copyright (C) 1992, 1993, 1994, 1995
5  * Remy Card (card@masi.ibp.fr)
6  * Laboratoire MASI - Institut Blaise Pascal
7  * Universite Pierre et Marie Curie (Paris VI)
8  *
9  *  Enhanced block allocation by Stephen Tweedie (sct@redhat.com), 1993
10  *  Big-endian to little-endian byte-swapping/bitmaps by
11  *        David S. Miller (davem@caip.rutgers.edu), 1995
12  */
13
14 #include <linux/config.h>
15 #include <linux/sched.h>
16 #include <linux/fs.h>
17 #include <linux/jbd.h>
18 #include <linux/ext3_fs.h>
19 #include <linux/ext3_jbd.h>
20 #include <linux/locks.h>
21 #include <linux/quotaops.h>
22
23 /*
24  * balloc.c contains the blocks allocation and deallocation routines
25  */
26
27 /*
28  * The free blocks are managed by bitmaps.  A file system contains several
29  * blocks groups.  Each group contains 1 bitmap block for blocks, 1 bitmap
30  * block for inodes, N blocks for the inode table and data blocks.
31  *
32  * The file system contains group descriptors which are located after the
33  * super block.  Each descriptor contains the number of the bitmap block and
34  * the free blocks count in the block.  The descriptors are loaded in memory
35  * when a file system is mounted (see ext3_read_super).
36  */
37
38
39 #define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1)
40
41 struct ext3_group_desc * ext3_get_group_desc(struct super_block * sb,
42                                              unsigned int block_group,
43                                              struct buffer_head ** bh)
44 {
45         unsigned long group_desc;
46         unsigned long desc;
47         struct ext3_group_desc * gdp;
48
49         if (block_group >= sb->u.ext3_sb.s_groups_count) {
50                 ext3_error (sb, "ext3_get_group_desc",
51                             "block_group >= groups_count - "
52                             "block_group = %d, groups_count = %lu",
53                             block_group, sb->u.ext3_sb.s_groups_count);
54
55                 return NULL;
56         }
57         
58         group_desc = block_group / EXT3_DESC_PER_BLOCK(sb);
59         desc = block_group % EXT3_DESC_PER_BLOCK(sb);
60         if (!sb->u.ext3_sb.s_group_desc[group_desc]) {
61                 ext3_error (sb, "ext3_get_group_desc",
62                             "Group descriptor not loaded - "
63                             "block_group = %d, group_desc = %lu, desc = %lu",
64                              block_group, group_desc, desc);
65                 return NULL;
66         }
67         
68         gdp = (struct ext3_group_desc *) 
69               sb->u.ext3_sb.s_group_desc[group_desc]->b_data;
70         if (bh)
71                 *bh = sb->u.ext3_sb.s_group_desc[group_desc];
72         return gdp + desc;
73 }
74
75 /*
76  * Read the bitmap for a given block_group, reading into the specified 
77  * slot in the superblock's bitmap cache.
78  *
79  * Return >=0 on success or a -ve error code.
80  */
81
82 static int read_block_bitmap (struct super_block * sb,
83                                unsigned int block_group,
84                                unsigned long bitmap_nr)
85 {
86         struct ext3_group_desc * gdp;
87         struct buffer_head * bh = NULL;
88         int retval = -EIO;
89         
90         gdp = ext3_get_group_desc (sb, block_group, NULL);
91         if (!gdp)
92                 goto error_out;
93         retval = 0;
94         bh = sb_bread(sb, le32_to_cpu(gdp->bg_block_bitmap));
95         if (!bh) {
96                 ext3_error (sb, "read_block_bitmap",
97                             "Cannot read block bitmap - "
98                             "block_group = %d, block_bitmap = %lu",
99                             block_group, (unsigned long) gdp->bg_block_bitmap);
100                 retval = -EIO;
101         }
102         /*
103          * On IO error, just leave a zero in the superblock's block pointer for
104          * this group.  The IO will be retried next time.
105          */
106 error_out:
107         sb->u.ext3_sb.s_block_bitmap_number[bitmap_nr] = block_group;
108         sb->u.ext3_sb.s_block_bitmap[bitmap_nr] = bh;
109         return retval;
110 }
111
112 /*
113  * load_block_bitmap loads the block bitmap for a blocks group
114  *
115  * It maintains a cache for the last bitmaps loaded.  This cache is managed
116  * with a LRU algorithm.
117  *
118  * Notes:
119  * 1/ There is one cache per mounted file system.
120  * 2/ If the file system contains less than EXT3_MAX_GROUP_LOADED groups,
121  *    this function reads the bitmap without maintaining a LRU cache.
122  * 
123  * Return the slot used to store the bitmap, or a -ve error code.
124  */
125 static int __load_block_bitmap (struct super_block * sb,
126                                 unsigned int block_group)
127 {
128         int i, j, retval = 0;
129         unsigned long block_bitmap_number;
130         struct buffer_head * block_bitmap;
131
132         if (block_group >= sb->u.ext3_sb.s_groups_count)
133                 ext3_panic (sb, "load_block_bitmap",
134                             "block_group >= groups_count - "
135                             "block_group = %d, groups_count = %lu",
136                             block_group, sb->u.ext3_sb.s_groups_count);
137
138         if (sb->u.ext3_sb.s_groups_count <= EXT3_MAX_GROUP_LOADED) {
139                 if (sb->u.ext3_sb.s_block_bitmap[block_group]) {
140                         if (sb->u.ext3_sb.s_block_bitmap_number[block_group] ==
141                             block_group)
142                                 return block_group;
143                         ext3_error (sb, "__load_block_bitmap",
144                                     "block_group != block_bitmap_number");
145                 }
146                 retval = read_block_bitmap (sb, block_group, block_group);
147                 if (retval < 0)
148                         return retval;
149                 return block_group;
150         }
151
152         for (i = 0; i < sb->u.ext3_sb.s_loaded_block_bitmaps &&
153                     sb->u.ext3_sb.s_block_bitmap_number[i] != block_group; i++)
154                 ;
155         if (i < sb->u.ext3_sb.s_loaded_block_bitmaps &&
156             sb->u.ext3_sb.s_block_bitmap_number[i] == block_group) {
157                 block_bitmap_number = sb->u.ext3_sb.s_block_bitmap_number[i];
158                 block_bitmap = sb->u.ext3_sb.s_block_bitmap[i];
159                 for (j = i; j > 0; j--) {
160                         sb->u.ext3_sb.s_block_bitmap_number[j] =
161                                 sb->u.ext3_sb.s_block_bitmap_number[j - 1];
162                         sb->u.ext3_sb.s_block_bitmap[j] =
163                                 sb->u.ext3_sb.s_block_bitmap[j - 1];
164                 }
165                 sb->u.ext3_sb.s_block_bitmap_number[0] = block_bitmap_number;
166                 sb->u.ext3_sb.s_block_bitmap[0] = block_bitmap;
167
168                 /*
169                  * There's still one special case here --- if block_bitmap == 0
170                  * then our last attempt to read the bitmap failed and we have
171                  * just ended up caching that failure.  Try again to read it.
172                  */
173                 if (!block_bitmap)
174                         retval = read_block_bitmap (sb, block_group, 0);
175         } else {
176                 if (sb->u.ext3_sb.s_loaded_block_bitmaps<EXT3_MAX_GROUP_LOADED)
177                         sb->u.ext3_sb.s_loaded_block_bitmaps++;
178                 else
179                         brelse (sb->u.ext3_sb.s_block_bitmap
180                                         [EXT3_MAX_GROUP_LOADED - 1]);
181                 for (j = sb->u.ext3_sb.s_loaded_block_bitmaps - 1;
182                                         j > 0;  j--) {
183                         sb->u.ext3_sb.s_block_bitmap_number[j] =
184                                 sb->u.ext3_sb.s_block_bitmap_number[j - 1];
185                         sb->u.ext3_sb.s_block_bitmap[j] =
186                                 sb->u.ext3_sb.s_block_bitmap[j - 1];
187                 }
188                 retval = read_block_bitmap (sb, block_group, 0);
189         }
190         return retval;
191 }
192
193 /*
194  * Load the block bitmap for a given block group.  First of all do a couple
195  * of fast lookups for common cases and then pass the request onto the guts
196  * of the bitmap loader.
197  *
198  * Return the slot number of the group in the superblock bitmap cache's on
199  * success, or a -ve error code.
200  *
201  * There is still one inconsistency here --- if the number of groups in this
202  * filesystems is <= EXT3_MAX_GROUP_LOADED, then we have no way of 
203  * differentiating between a group for which we have never performed a bitmap
204  * IO request, and a group for which the last bitmap read request failed.
205  */
206 static inline int load_block_bitmap (struct super_block * sb,
207                                      unsigned int block_group)
208 {
209         int slot;
210         
211         /*
212          * Do the lookup for the slot.  First of all, check if we're asking
213          * for the same slot as last time, and did we succeed that last time?
214          */
215         if (sb->u.ext3_sb.s_loaded_block_bitmaps > 0 &&
216             sb->u.ext3_sb.s_block_bitmap_number[0] == block_group &&
217             sb->u.ext3_sb.s_block_bitmap[0]) {
218                 return 0;
219         }
220         /*
221          * Or can we do a fast lookup based on a loaded group on a filesystem
222          * small enough to be mapped directly into the superblock?
223          */
224         else if (sb->u.ext3_sb.s_groups_count <= EXT3_MAX_GROUP_LOADED && 
225                  sb->u.ext3_sb.s_block_bitmap_number[block_group]==block_group
226                         && sb->u.ext3_sb.s_block_bitmap[block_group]) {
227                 slot = block_group;
228         }
229         /*
230          * If not, then do a full lookup for this block group.
231          */
232         else {
233                 slot = __load_block_bitmap (sb, block_group);
234         }
235
236         /*
237          * <0 means we just got an error
238          */
239         if (slot < 0)
240                 return slot;
241         
242         /*
243          * If it's a valid slot, we may still have cached a previous IO error,
244          * in which case the bh in the superblock cache will be zero.
245          */
246         if (!sb->u.ext3_sb.s_block_bitmap[slot])
247                 return -EIO;
248         
249         /*
250          * Must have been read in OK to get this far.
251          */
252         return slot;
253 }
254
255 /* Free given blocks, update quota and i_blocks field */
256 void ext3_free_blocks (handle_t *handle, struct inode * inode,
257                         unsigned long block, unsigned long count)
258 {
259         struct buffer_head *bitmap_bh;
260         struct buffer_head *gd_bh;
261         unsigned long block_group;
262         unsigned long bit;
263         unsigned long i;
264         int bitmap_nr;
265         unsigned long overflow;
266         struct super_block * sb;
267         struct ext3_group_desc * gdp;
268         struct ext3_super_block * es;
269         int err = 0, ret;
270         int dquot_freed_blocks = 0;
271
272         sb = inode->i_sb;
273         if (!sb) {
274                 printk ("ext3_free_blocks: nonexistent device");
275                 return;
276         }
277         lock_super (sb);
278         es = sb->u.ext3_sb.s_es;
279         if (block < le32_to_cpu(es->s_first_data_block) ||
280             block + count < block ||
281             (block + count) > le32_to_cpu(es->s_blocks_count)) {
282                 ext3_error (sb, "ext3_free_blocks",
283                             "Freeing blocks not in datazone - "
284                             "block = %lu, count = %lu", block, count);
285                 goto error_return;
286         }
287
288         ext3_debug ("freeing block %lu\n", block);
289
290 do_more:
291         overflow = 0;
292         block_group = (block - le32_to_cpu(es->s_first_data_block)) /
293                       EXT3_BLOCKS_PER_GROUP(sb);
294         bit = (block - le32_to_cpu(es->s_first_data_block)) %
295                       EXT3_BLOCKS_PER_GROUP(sb);
296         /*
297          * Check to see if we are freeing blocks across a group
298          * boundary.
299          */
300         if (bit + count > EXT3_BLOCKS_PER_GROUP(sb)) {
301                 overflow = bit + count - EXT3_BLOCKS_PER_GROUP(sb);
302                 count -= overflow;
303         }
304         bitmap_nr = load_block_bitmap (sb, block_group);
305         if (bitmap_nr < 0)
306                 goto error_return;
307         
308         bitmap_bh = sb->u.ext3_sb.s_block_bitmap[bitmap_nr];
309         gdp = ext3_get_group_desc (sb, block_group, &gd_bh);
310         if (!gdp)
311                 goto error_return;
312
313         /*
314          * We are about to start releasing blocks in the bitmap,
315          * so we need undo access.
316          */
317         /* @@@ check errors */
318         BUFFER_TRACE(bitmap_bh, "getting undo access");
319         err = ext3_journal_get_undo_access(handle, bitmap_bh);
320         if (err)
321                 goto error_return;
322         
323         /*
324          * We are about to modify some metadata.  Call the journal APIs
325          * to unshare ->b_data if a currently-committing transaction is
326          * using it
327          */
328         BUFFER_TRACE(gd_bh, "get_write_access");
329         err = ext3_journal_get_write_access(handle, gd_bh);     
330         if (err)
331                 goto error_return;
332
333         BUFFER_TRACE(sb->u.ext3_sb.s_sbh, "get_write_access");
334         err = ext3_journal_get_write_access(handle, sb->u.ext3_sb.s_sbh);
335         if (err)
336                 goto error_return;
337
338         for (i = 0; i < count; i++, block++) {
339                 if (block == le32_to_cpu(gdp->bg_block_bitmap) ||
340                     block == le32_to_cpu(gdp->bg_inode_bitmap) ||
341                     in_range(block, le32_to_cpu(gdp->bg_inode_table),
342                              EXT3_SB(sb)->s_itb_per_group)) {
343                         ext3_error(sb, __FUNCTION__,
344                                    "Freeing block in system zone - block = %lu",
345                                    block);
346                         continue;
347                 }
348
349                 /*
350                  * An HJ special.  This is expensive...
351                  */
352 #ifdef CONFIG_JBD_DEBUG
353                 {
354                         struct buffer_head *debug_bh;
355                         debug_bh = sb_get_hash_table(sb, block);
356                         if (debug_bh) {
357                                 BUFFER_TRACE(debug_bh, "Deleted!");
358                                 if (!bh2jh(bitmap_bh)->b_committed_data)
359                                         BUFFER_TRACE(debug_bh,
360                                                 "No commited data in bitmap");
361                                 BUFFER_TRACE2(debug_bh, bitmap_bh, "bitmap");
362                                 __brelse(debug_bh);
363                         }
364                 }
365 #endif
366                 BUFFER_TRACE(bitmap_bh, "clear bit");
367                 if (!ext3_clear_bit (bit + i, bitmap_bh->b_data)) {
368                         ext3_error(sb, __FUNCTION__,
369                                    "bit already cleared for block %lu", block);
370                         BUFFER_TRACE(bitmap_bh, "bit already cleared");
371                 } else {
372                         dquot_freed_blocks++;
373                         gdp->bg_free_blocks_count =
374                           cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count)+1);
375                         es->s_free_blocks_count =
376                           cpu_to_le32(le32_to_cpu(es->s_free_blocks_count)+1);
377                 }
378                 /* @@@ This prevents newly-allocated data from being
379                  * freed and then reallocated within the same
380                  * transaction. 
381                  * 
382                  * Ideally we would want to allow that to happen, but to
383                  * do so requires making journal_forget() capable of
384                  * revoking the queued write of a data block, which
385                  * implies blocking on the journal lock.  *forget()
386                  * cannot block due to truncate races.
387                  *
388                  * Eventually we can fix this by making journal_forget()
389                  * return a status indicating whether or not it was able
390                  * to revoke the buffer.  On successful revoke, it is
391                  * safe not to set the allocation bit in the committed
392                  * bitmap, because we know that there is no outstanding
393                  * activity on the buffer any more and so it is safe to
394                  * reallocate it.  
395                  */
396                 BUFFER_TRACE(bitmap_bh, "clear in b_committed_data");
397                 J_ASSERT_BH(bitmap_bh,
398                                 bh2jh(bitmap_bh)->b_committed_data != NULL);
399                 ext3_set_bit(bit + i, bh2jh(bitmap_bh)->b_committed_data);
400         }
401
402         /* We dirtied the bitmap block */
403         BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
404         err = ext3_journal_dirty_metadata(handle, bitmap_bh);
405
406         /* And the group descriptor block */
407         BUFFER_TRACE(gd_bh, "dirtied group descriptor block");
408         ret = ext3_journal_dirty_metadata(handle, gd_bh);
409         if (!err) err = ret;
410
411         /* And the superblock */
412         BUFFER_TRACE(sb->u.ext3_sb.s_sbh, "dirtied superblock");
413         ret = ext3_journal_dirty_metadata(handle, sb->u.ext3_sb.s_sbh);
414         if (!err) err = ret;
415
416         if (overflow && !err) {
417                 count = overflow;
418                 goto do_more;
419         }
420         sb->s_dirt = 1;
421 error_return:
422         ext3_std_error(sb, err);
423         unlock_super(sb);
424         if (dquot_freed_blocks)
425                 DQUOT_FREE_BLOCK(inode, dquot_freed_blocks);
426         return;
427 }
428
429 /* For ext3 allocations, we must not reuse any blocks which are
430  * allocated in the bitmap buffer's "last committed data" copy.  This
431  * prevents deletes from freeing up the page for reuse until we have
432  * committed the delete transaction.
433  *
434  * If we didn't do this, then deleting something and reallocating it as
435  * data would allow the old block to be overwritten before the
436  * transaction committed (because we force data to disk before commit).
437  * This would lead to corruption if we crashed between overwriting the
438  * data and committing the delete. 
439  *
440  * @@@ We may want to make this allocation behaviour conditional on
441  * data-writes at some point, and disable it for metadata allocations or
442  * sync-data inodes.
443  */
444 static int ext3_test_allocatable(int nr, struct buffer_head *bh)
445 {
446         if (ext3_test_bit(nr, bh->b_data))
447                 return 0;
448         if (!buffer_jbd(bh) || !bh2jh(bh)->b_committed_data)
449                 return 1;
450         return !ext3_test_bit(nr, bh2jh(bh)->b_committed_data);
451 }
452
453 /*
454  * Find an allocatable block in a bitmap.  We honour both the bitmap and
455  * its last-committed copy (if that exists), and perform the "most
456  * appropriate allocation" algorithm of looking for a free block near
457  * the initial goal; then for a free byte somewhere in the bitmap; then
458  * for any free bit in the bitmap.
459  */
460 static int find_next_usable_block(int start,
461                         struct buffer_head *bh, int maxblocks)
462 {
463         int here, next;
464         char *p, *r;
465         
466         if (start > 0) {
467                 /*
468                  * The goal was occupied; search forward for a free 
469                  * block within the next XX blocks.
470                  *
471                  * end_goal is more or less random, but it has to be
472                  * less than EXT3_BLOCKS_PER_GROUP. Aligning up to the
473                  * next 64-bit boundary is simple..
474                  */
475                 int end_goal = (start + 63) & ~63;
476                 here = ext3_find_next_zero_bit(bh->b_data, end_goal, start);
477                 if (here < end_goal && ext3_test_allocatable(here, bh))
478                         return here;
479                 
480                 ext3_debug ("Bit not found near goal\n");
481                 
482         }
483         
484         here = start;
485         if (here < 0)
486                 here = 0;
487         
488         /*
489          * There has been no free block found in the near vicinity of
490          * the goal: do a search forward through the block groups,
491          * searching in each group first for an entire free byte in the
492          * bitmap and then for any free bit.
493          * 
494          * Search first in the remainder of the current group 
495          */
496         p = ((char *) bh->b_data) + (here >> 3);
497         r = memscan(p, 0, (maxblocks - here + 7) >> 3);
498         next = (r - ((char *) bh->b_data)) << 3;
499         
500         if (next < maxblocks && ext3_test_allocatable(next, bh))
501                 return next;
502         
503         /* The bitmap search --- search forward alternately
504          * through the actual bitmap and the last-committed copy
505          * until we find a bit free in both. */
506
507         while (here < maxblocks) {
508                 next  = ext3_find_next_zero_bit ((unsigned long *) bh->b_data, 
509                                                  maxblocks, here);
510                 if (next >= maxblocks)
511                         return -1;
512                 if (ext3_test_allocatable(next, bh))
513                         return next;
514
515                 J_ASSERT_BH(bh, bh2jh(bh)->b_committed_data);
516                 here = ext3_find_next_zero_bit
517                         ((unsigned long *) bh2jh(bh)->b_committed_data, 
518                          maxblocks, next);
519         }
520         return -1;
521 }
522
523 /*
524  * ext3_new_block uses a goal block to assist allocation.  If the goal is
525  * free, or there is a free block within 32 blocks of the goal, that block
526  * is allocated.  Otherwise a forward search is made for a free block; within 
527  * each block group the search first looks for an entire free byte in the block
528  * bitmap, and then for any free bit if that fails.
529  * This function also updates quota and i_blocks field.
530  */
531 int ext3_new_block (handle_t *handle, struct inode * inode,
532                 unsigned long goal, u32 * prealloc_count,
533                 u32 * prealloc_block, int * errp)
534 {
535         struct buffer_head * bh, *bhtmp;
536         struct buffer_head * bh2;
537 #if 0
538         char * p, * r;
539 #endif
540         int i, j, k, tmp, alloctmp;
541         int bitmap_nr;
542         int fatal = 0, err;
543         int performed_allocation = 0;
544         struct super_block * sb;
545         struct ext3_group_desc * gdp;
546         struct ext3_super_block * es;
547 #ifdef EXT3FS_DEBUG
548         static int goal_hits = 0, goal_attempts = 0;
549 #endif
550         *errp = -ENOSPC;
551         sb = inode->i_sb;
552         if (!sb) {
553                 printk ("ext3_new_block: nonexistent device");
554                 return 0;
555         }
556
557         /*
558          * Check quota for allocation of this block.
559          */
560         if (DQUOT_ALLOC_BLOCK(inode, 1)) {
561                 *errp = -EDQUOT;
562                 return 0;
563         }
564
565         lock_super (sb);
566         es = sb->u.ext3_sb.s_es;
567         if (le32_to_cpu(es->s_free_blocks_count) <=
568                         le32_to_cpu(es->s_r_blocks_count) &&
569             ((sb->u.ext3_sb.s_resuid != current->fsuid) &&
570              (sb->u.ext3_sb.s_resgid == 0 ||
571               !in_group_p (sb->u.ext3_sb.s_resgid)) && 
572              !capable(CAP_SYS_RESOURCE)))
573                 goto out;
574
575         ext3_debug ("goal=%lu.\n", goal);
576
577 repeat:
578         /*
579          * First, test whether the goal block is free.
580          */
581         if (goal < le32_to_cpu(es->s_first_data_block) ||
582             goal >= le32_to_cpu(es->s_blocks_count))
583                 goal = le32_to_cpu(es->s_first_data_block);
584         i = (goal - le32_to_cpu(es->s_first_data_block)) /
585                         EXT3_BLOCKS_PER_GROUP(sb);
586         gdp = ext3_get_group_desc (sb, i, &bh2);
587         if (!gdp)
588                 goto io_error;
589
590         if (le16_to_cpu(gdp->bg_free_blocks_count) > 0) {
591                 j = ((goal - le32_to_cpu(es->s_first_data_block)) %
592                                 EXT3_BLOCKS_PER_GROUP(sb));
593 #ifdef EXT3FS_DEBUG
594                 if (j)
595                         goal_attempts++;
596 #endif
597                 bitmap_nr = load_block_bitmap (sb, i);
598                 if (bitmap_nr < 0)
599                         goto io_error;
600                 
601                 bh = sb->u.ext3_sb.s_block_bitmap[bitmap_nr];
602
603                 ext3_debug ("goal is at %d:%d.\n", i, j);
604
605                 if (ext3_test_allocatable(j, bh)) {
606 #ifdef EXT3FS_DEBUG
607                         goal_hits++;
608                         ext3_debug ("goal bit allocated.\n");
609 #endif
610                         goto got_block;
611                 }
612
613                 j = find_next_usable_block(j, bh, EXT3_BLOCKS_PER_GROUP(sb));
614                 if (j >= 0)
615                         goto search_back;
616         }
617
618         ext3_debug ("Bit not found in block group %d.\n", i);
619
620         /*
621          * Now search the rest of the groups.  We assume that 
622          * i and gdp correctly point to the last group visited.
623          */
624         for (k = 0; k < sb->u.ext3_sb.s_groups_count; k++) {
625                 i++;
626                 if (i >= sb->u.ext3_sb.s_groups_count)
627                         i = 0;
628                 gdp = ext3_get_group_desc (sb, i, &bh2);
629                 if (!gdp) {
630                         *errp = -EIO;
631                         goto out;
632                 }
633                 if (le16_to_cpu(gdp->bg_free_blocks_count) > 0) {
634                         bitmap_nr = load_block_bitmap (sb, i);
635                         if (bitmap_nr < 0)
636                                 goto io_error;
637         
638                         bh = sb->u.ext3_sb.s_block_bitmap[bitmap_nr];
639                         j = find_next_usable_block(-1, bh, 
640                                                    EXT3_BLOCKS_PER_GROUP(sb));
641                         if (j >= 0) 
642                                 goto search_back;
643                 }
644         }
645
646         /* No space left on the device */
647         goto out;
648
649 search_back:
650         /* 
651          * We have succeeded in finding a free byte in the block
652          * bitmap.  Now search backwards up to 7 bits to find the
653          * start of this group of free blocks.
654          */
655         for (   k = 0;
656                 k < 7 && j > 0 && ext3_test_allocatable(j - 1, bh);
657                 k++, j--)
658                 ;
659         
660 got_block:
661
662         ext3_debug ("using block group %d(%d)\n", i, gdp->bg_free_blocks_count);
663
664         /* Make sure we use undo access for the bitmap, because it is
665            critical that we do the frozen_data COW on bitmap buffers in
666            all cases even if the buffer is in BJ_Forget state in the
667            committing transaction.  */
668         BUFFER_TRACE(bh, "get undo access for marking new block");
669         fatal = ext3_journal_get_undo_access(handle, bh);
670         if (fatal) goto out;
671         
672         BUFFER_TRACE(bh2, "get_write_access");
673         fatal = ext3_journal_get_write_access(handle, bh2);
674         if (fatal) goto out;
675
676         BUFFER_TRACE(sb->u.ext3_sb.s_sbh, "get_write_access");
677         fatal = ext3_journal_get_write_access(handle, sb->u.ext3_sb.s_sbh);
678         if (fatal) goto out;
679
680         tmp = j + i * EXT3_BLOCKS_PER_GROUP(sb)
681                                 + le32_to_cpu(es->s_first_data_block);
682
683         if (tmp == le32_to_cpu(gdp->bg_block_bitmap) ||
684             tmp == le32_to_cpu(gdp->bg_inode_bitmap) ||
685             in_range (tmp, le32_to_cpu(gdp->bg_inode_table),
686                       EXT3_SB(sb)->s_itb_per_group)) {
687                 ext3_error(sb, __FUNCTION__,
688                            "Allocating block in system zone - block = %u", tmp);
689
690                 /* Note: This will potentially use up one of the handle's
691                  * buffer credits.  Normally we have way too many credits,
692                  * so that is OK.  In _very_ rare cases it might not be OK.
693                  * We will trigger an assertion if we run out of credits,
694                  * and we will have to do a full fsck of the filesystem -
695                  * better than randomly corrupting filesystem metadata.
696                  */
697                 ext3_set_bit(j, bh->b_data);
698                 goto repeat;
699         }
700
701
702         /* The superblock lock should guard against anybody else beating
703          * us to this point! */
704         J_ASSERT_BH(bh, !ext3_test_bit(j, bh->b_data));
705         BUFFER_TRACE(bh, "setting bitmap bit");
706         ext3_set_bit(j, bh->b_data);
707         performed_allocation = 1;
708
709 #ifdef CONFIG_JBD_DEBUG
710         {
711                 struct buffer_head *debug_bh;
712
713                 /* Record bitmap buffer state in the newly allocated block */
714                 debug_bh = sb_get_hash_table(sb, tmp);
715                 if (debug_bh) {
716                         BUFFER_TRACE(debug_bh, "state when allocated");
717                         BUFFER_TRACE2(debug_bh, bh, "bitmap state");
718                         brelse(debug_bh);
719                 }
720         }
721 #endif
722         if (buffer_jbd(bh) && bh2jh(bh)->b_committed_data)
723                 J_ASSERT_BH(bh, !ext3_test_bit(j, bh2jh(bh)->b_committed_data));
724         bhtmp = bh;
725         alloctmp = j;
726
727         ext3_debug ("found bit %d\n", j);
728
729         /*
730          * Do block preallocation now if required.
731          */
732 #ifdef EXT3_PREALLOCATE
733         /*
734          * akpm: this is not enabled for ext3.  Need to use
735          * ext3_test_allocatable()
736          */
737         /* Writer: ->i_prealloc* */
738         if (prealloc_count && !*prealloc_count) {
739                 int     prealloc_goal;
740                 unsigned long next_block = tmp + 1;
741
742                 prealloc_goal = es->s_prealloc_blocks ?
743                         es->s_prealloc_blocks : EXT3_DEFAULT_PREALLOC_BLOCKS;
744
745                 *prealloc_block = next_block;
746                 /* Writer: end */
747                 for (k = 1;
748                      k < prealloc_goal && (j + k) < EXT3_BLOCKS_PER_GROUP(sb);
749                      k++, next_block++) {
750                         if (DQUOT_PREALLOC_BLOCK(inode, 1))
751                                 break;
752                         /* Writer: ->i_prealloc* */
753                         if (*prealloc_block + *prealloc_count != next_block ||
754                             ext3_set_bit (j + k, bh->b_data)) {
755                                 /* Writer: end */
756                                 DQUOT_FREE_BLOCK(inode, 1);
757                                 break;
758                         }
759                         (*prealloc_count)++;
760                         /* Writer: end */
761                 }       
762                 /*
763                  * As soon as we go for per-group spinlocks we'll need these
764                  * done inside the loop above.
765                  */
766                 gdp->bg_free_blocks_count =
767                         cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) -
768                                (k - 1));
769                 es->s_free_blocks_count =
770                         cpu_to_le32(le32_to_cpu(es->s_free_blocks_count) -
771                                (k - 1));
772                 ext3_debug ("Preallocated a further %lu bits.\n",
773                                (k - 1));
774         }
775 #endif
776
777         j = tmp;
778
779         BUFFER_TRACE(bh, "journal_dirty_metadata for bitmap block");
780         err = ext3_journal_dirty_metadata(handle, bh);
781         if (!fatal) fatal = err;
782         
783         if (j >= le32_to_cpu(es->s_blocks_count)) {
784                 ext3_error (sb, "ext3_new_block",
785                             "block(%d) >= blocks count(%d) - "
786                             "block_group = %d, es == %p ",j,
787                         le32_to_cpu(es->s_blocks_count), i, es);
788                 goto out;
789         }
790
791         /*
792          * It is up to the caller to add the new buffer to a journal
793          * list of some description.  We don't know in advance whether
794          * the caller wants to use it as metadata or data.
795          */
796
797         ext3_debug ("allocating block %d. "
798                     "Goal hits %d of %d.\n", j, goal_hits, goal_attempts);
799
800         gdp->bg_free_blocks_count =
801                         cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) - 1);
802         es->s_free_blocks_count =
803                         cpu_to_le32(le32_to_cpu(es->s_free_blocks_count) - 1);
804
805         BUFFER_TRACE(bh2, "journal_dirty_metadata for group descriptor");
806         err = ext3_journal_dirty_metadata(handle, bh2);
807         if (!fatal) fatal = err;
808         
809         BUFFER_TRACE(bh, "journal_dirty_metadata for superblock");
810         err = ext3_journal_dirty_metadata(handle, sb->u.ext3_sb.s_sbh);
811         if (!fatal) fatal = err;
812
813         sb->s_dirt = 1;
814         if (fatal)
815                 goto out;
816
817         unlock_super (sb);
818         *errp = 0;
819         return j;
820         
821 io_error:
822         *errp = -EIO;
823 out:
824         if (fatal) {
825                 *errp = fatal;
826                 ext3_std_error(sb, fatal);
827         }
828         unlock_super (sb);
829         /*
830          * Undo the block allocation
831          */
832         if (!performed_allocation)
833                 DQUOT_FREE_BLOCK(inode, 1);
834         return 0;
835         
836 }
837
838 unsigned long ext3_count_free_blocks (struct super_block * sb)
839 {
840 #ifdef EXT3FS_DEBUG
841         struct ext3_super_block * es;
842         unsigned long desc_count, bitmap_count, x;
843         int bitmap_nr;
844         struct ext3_group_desc * gdp;
845         int i;
846         
847         lock_super (sb);
848         es = sb->u.ext3_sb.s_es;
849         desc_count = 0;
850         bitmap_count = 0;
851         gdp = NULL;
852         for (i = 0; i < sb->u.ext3_sb.s_groups_count; i++) {
853                 gdp = ext3_get_group_desc (sb, i, NULL);
854                 if (!gdp)
855                         continue;
856                 desc_count += le16_to_cpu(gdp->bg_free_blocks_count);
857                 bitmap_nr = load_block_bitmap (sb, i);
858                 if (bitmap_nr < 0)
859                         continue;
860                 
861                 x = ext3_count_free (sb->u.ext3_sb.s_block_bitmap[bitmap_nr],
862                                      sb->s_blocksize);
863                 printk ("group %d: stored = %d, counted = %lu\n",
864                         i, le16_to_cpu(gdp->bg_free_blocks_count), x);
865                 bitmap_count += x;
866         }
867         printk("ext3_count_free_blocks: stored = %lu, computed = %lu, %lu\n",
868                le32_to_cpu(es->s_free_blocks_count), desc_count, bitmap_count);
869         unlock_super (sb);
870         return bitmap_count;
871 #else
872         return le32_to_cpu(sb->u.ext3_sb.s_es->s_free_blocks_count);
873 #endif
874 }
875
876 static inline int block_in_use (unsigned long block,
877                                 struct super_block * sb,
878                                 unsigned char * map)
879 {
880         return ext3_test_bit ((block -
881                 le32_to_cpu(sb->u.ext3_sb.s_es->s_first_data_block)) %
882                          EXT3_BLOCKS_PER_GROUP(sb), map);
883 }
884
885 static inline int test_root(int a, int b)
886 {
887         if (a == 0)
888                 return 1;
889         while (1) {
890                 if (a == 1)
891                         return 1;
892                 if (a % b)
893                         return 0;
894                 a = a / b;
895         }
896 }
897
898 int ext3_group_sparse(int group)
899 {
900         return (test_root(group, 3) || test_root(group, 5) ||
901                 test_root(group, 7));
902 }
903
904 /**
905  *      ext3_bg_has_super - number of blocks used by the superblock in group
906  *      @sb: superblock for filesystem
907  *      @group: group number to check
908  *
909  *      Return the number of blocks used by the superblock (primary or backup)
910  *      in this group.  Currently this will be only 0 or 1.
911  */
912 int ext3_bg_has_super(struct super_block *sb, int group)
913 {
914         if (EXT3_HAS_RO_COMPAT_FEATURE(sb,EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER)&&
915             !ext3_group_sparse(group))
916                 return 0;
917         return 1;
918 }
919
920 /**
921  *      ext3_bg_num_gdb - number of blocks used by the group table in group
922  *      @sb: superblock for filesystem
923  *      @group: group number to check
924  *
925  *      Return the number of blocks used by the group descriptor table
926  *      (primary or backup) in this group.  In the future there may be a
927  *      different number of descriptor blocks in each group.
928  */
929 unsigned long ext3_bg_num_gdb(struct super_block *sb, int group)
930 {
931         if (EXT3_HAS_RO_COMPAT_FEATURE(sb,EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER)&&
932             !ext3_group_sparse(group))
933                 return 0;
934         return EXT3_SB(sb)->s_gdb_count;
935 }
936
937 #ifdef CONFIG_EXT3_CHECK
938 /* Called at mount-time, super-block is locked */
939 void ext3_check_blocks_bitmap (struct super_block * sb)
940 {
941         struct buffer_head * bh;
942         struct ext3_super_block * es;
943         unsigned long desc_count, bitmap_count, x, j;
944         unsigned long desc_blocks;
945         int bitmap_nr;
946         struct ext3_group_desc * gdp;
947         int i;
948
949         es = sb->u.ext3_sb.s_es;
950         desc_count = 0;
951         bitmap_count = 0;
952         gdp = NULL;
953         for (i = 0; i < sb->u.ext3_sb.s_groups_count; i++) {
954                 gdp = ext3_get_group_desc (sb, i, NULL);
955                 if (!gdp)
956                         continue;
957                 desc_count += le16_to_cpu(gdp->bg_free_blocks_count);
958                 bitmap_nr = load_block_bitmap (sb, i);
959                 if (bitmap_nr < 0)
960                         continue;
961
962                 bh = EXT3_SB(sb)->s_block_bitmap[bitmap_nr];
963
964                 if (ext3_bg_has_super(sb, i) && !ext3_test_bit(0, bh->b_data))
965                         ext3_error(sb, __FUNCTION__,
966                                    "Superblock in group %d is marked free", i);
967
968                 desc_blocks = ext3_bg_num_gdb(sb, i);
969                 for (j = 0; j < desc_blocks; j++)
970                         if (!ext3_test_bit(j + 1, bh->b_data))
971                                 ext3_error(sb, __FUNCTION__,
972                                            "Descriptor block #%ld in group "
973                                            "%d is marked free", j, i);
974
975                 if (!block_in_use (le32_to_cpu(gdp->bg_block_bitmap),
976                                                 sb, bh->b_data))
977                         ext3_error (sb, "ext3_check_blocks_bitmap",
978                                     "Block bitmap for group %d is marked free",
979                                     i);
980
981                 if (!block_in_use (le32_to_cpu(gdp->bg_inode_bitmap),
982                                                 sb, bh->b_data))
983                         ext3_error (sb, "ext3_check_blocks_bitmap",
984                                     "Inode bitmap for group %d is marked free",
985                                     i);
986
987                 for (j = 0; j < sb->u.ext3_sb.s_itb_per_group; j++)
988                         if (!block_in_use (le32_to_cpu(gdp->bg_inode_table) + j,
989                                                         sb, bh->b_data))
990                                 ext3_error (sb, "ext3_check_blocks_bitmap",
991                                             "Block #%d of the inode table in "
992                                             "group %d is marked free", j, i);
993
994                 x = ext3_count_free (bh, sb->s_blocksize);
995                 if (le16_to_cpu(gdp->bg_free_blocks_count) != x)
996                         ext3_error (sb, "ext3_check_blocks_bitmap",
997                                     "Wrong free blocks count for group %d, "
998                                     "stored = %d, counted = %lu", i,
999                                     le16_to_cpu(gdp->bg_free_blocks_count), x);
1000                 bitmap_count += x;
1001         }
1002         if (le32_to_cpu(es->s_free_blocks_count) != bitmap_count)
1003                 ext3_error (sb, "ext3_check_blocks_bitmap",
1004                         "Wrong free blocks count in super block, "
1005                         "stored = %lu, counted = %lu",
1006                         (unsigned long)le32_to_cpu(es->s_free_blocks_count),
1007                         bitmap_count);
1008 }
1009 #endif