2 * Copyright (c) International Business Machines Corp., 2000-2002
3 * Portions Copyright (c) Christoph Hellwig, 2001-2002
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
13 * the GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21 #include <linux/init.h>
22 #include "jfs_incore.h"
23 #include "jfs_filsys.h"
24 #include "jfs_metapage.h"
25 #include "jfs_txnmgr.h"
26 #include "jfs_debug.h"
28 extern struct task_struct *jfsCommitTask;
29 static spinlock_t meta_lock = SPIN_LOCK_UNLOCKED;
30 static wait_queue_head_t meta_wait;
32 #ifdef CONFIG_JFS_STATISTICS
34 uint pagealloc; /* # of page allocations */
35 uint pagefree; /* # of page frees */
36 uint lockwait; /* # of sleeping lock_metapage() calls */
37 uint allocwait; /* # of sleeping alloc_metapage() calls */
42 #define HASH_BITS 10 /* This makes hash_table 1 4K page */
43 #define HASH_SIZE (1 << HASH_BITS)
44 static struct metapage **hash_table = NULL;
45 static unsigned long hash_order;
48 static inline int metapage_locked(struct metapage *mp)
50 return test_bit(META_locked, &mp->flag);
53 static inline int trylock_metapage(struct metapage *mp)
55 return test_and_set_bit(META_locked, &mp->flag);
58 static inline void unlock_metapage(struct metapage *mp)
60 clear_bit(META_locked, &mp->flag);
64 static void __lock_metapage(struct metapage *mp)
66 DECLARE_WAITQUEUE(wait, current);
68 INCREMENT(mpStat.lockwait);
70 add_wait_queue_exclusive(&mp->wait, &wait);
72 set_current_state(TASK_UNINTERRUPTIBLE);
73 if (metapage_locked(mp)) {
74 spin_unlock(&meta_lock);
76 spin_lock(&meta_lock);
78 } while (trylock_metapage(mp));
79 __set_current_state(TASK_RUNNING);
80 remove_wait_queue(&mp->wait, &wait);
84 static inline void lock_metapage(struct metapage *mp)
86 if (trylock_metapage(mp))
91 * metapage pool is based on Linux 2.5's mempool
93 * Tap into reserved structures in critical paths where waiting on a
94 * memory allocation could cause deadlock
96 #define METAPOOL_MIN_PAGES 32
97 static struct metapage *reserved_metapages[METAPOOL_MIN_PAGES];
98 static int num_reserved = 0;
99 kmem_cache_t *metapage_cache;
101 static void init_once(void *foo, kmem_cache_t *cachep, unsigned long flags)
103 struct metapage *mp = (struct metapage *)foo;
105 if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
106 SLAB_CTOR_CONSTRUCTOR) {
113 set_bit(META_free, &mp->flag);
114 init_waitqueue_head(&mp->wait);
118 static void empty_reserved(void)
120 while (num_reserved--)
121 kmem_cache_free(metapage_cache,
122 reserved_metapages[num_reserved]);
125 static struct metapage *alloc_metapage(int *dropped_lock, int no_wait)
127 struct metapage *new;
132 * Always try an atomic alloc first, to avoid dropping the
135 new = kmem_cache_alloc(metapage_cache, GFP_ATOMIC);
139 if (no_wait && num_reserved)
140 return reserved_metapages[--num_reserved];
143 spin_unlock(&meta_lock);
144 new = kmem_cache_alloc(metapage_cache, GFP_NOFS);
145 spin_lock(&meta_lock);
149 static void __free_metapage(struct metapage *mp)
152 set_bit(META_free, &mp->flag);
154 if (num_reserved < METAPOOL_MIN_PAGES)
155 reserved_metapages[num_reserved++] = mp;
157 kmem_cache_free(metapage_cache, mp);
160 static inline void free_metapage(struct metapage * mp)
162 spin_lock(&meta_lock);
164 spin_unlock(&meta_lock);
167 int __init metapage_init(void)
172 * Initialize wait queue
174 init_waitqueue_head(&meta_wait);
177 * Allocate the metapage structures
179 metapage_cache = kmem_cache_create("jfs_mp", sizeof(struct metapage),
180 0, 0, init_once, NULL);
181 if (metapage_cache == NULL)
184 while (num_reserved < METAPOOL_MIN_PAGES) {
185 mp = kmem_cache_alloc(metapage_cache, GFP_NOFS);
187 reserved_metapages[num_reserved++] = mp;
190 kmem_cache_destroy(metapage_cache);
198 ((PAGE_SIZE << hash_order) / sizeof(void *)) < HASH_SIZE;
201 (struct metapage **) __get_free_pages(GFP_KERNEL, hash_order);
203 memset(hash_table, 0, PAGE_SIZE << hash_order);
208 void metapage_exit(void)
211 kmem_cache_destroy(metapage_cache);
215 * Basically same hash as in pagemap.h, but using our hash table
217 static struct metapage **meta_hash(struct address_space *mapping,
220 #define i (((unsigned long)mapping)/ \
221 (sizeof(struct inode) & ~(sizeof(struct inode) -1 )))
222 #define s(x) ((x) + ((x) >> HASH_BITS))
223 return hash_table + (s(i + index) & (HASH_SIZE - 1));
228 static struct metapage *search_hash(struct metapage ** hash_ptr,
229 struct address_space *mapping,
232 struct metapage *ptr;
234 for (ptr = *hash_ptr; ptr; ptr = ptr->hash_next) {
235 if ((ptr->mapping == mapping) && (ptr->index == index))
242 static void add_to_hash(struct metapage * mp, struct metapage ** hash_ptr)
245 (*hash_ptr)->hash_prev = mp;
247 mp->hash_prev = NULL;
248 mp->hash_next = *hash_ptr;
252 static void remove_from_hash(struct metapage * mp, struct metapage ** hash_ptr)
255 mp->hash_prev->hash_next = mp->hash_next;
257 assert(*hash_ptr == mp);
258 *hash_ptr = mp->hash_next;
262 mp->hash_next->hash_prev = mp->hash_prev;
265 struct metapage *__get_metapage(struct inode *inode, unsigned long lblock,
266 unsigned int size, int absolute,
270 struct metapage **hash_ptr;
274 struct address_space *mapping;
276 unsigned long page_index;
277 unsigned long page_offset;
279 jFYI(1, ("__get_metapage: inode = 0x%p, lblock = 0x%lx\n",
283 mapping = inode->i_sb->s_bdev->bd_inode->i_mapping;
285 mapping = inode->i_mapping;
287 spin_lock(&meta_lock);
289 hash_ptr = meta_hash(mapping, lblock);
291 mp = search_hash(hash_ptr, mapping, lblock);
294 if (test_bit(META_discard, &mp->flag)) {
295 assert(new); /* It's okay to reuse a discarded
296 * if we expect it to be empty
298 clear_bit(META_discard, &mp->flag);
301 jFYI(1, ("__get_metapage: found 0x%p, in hash\n", mp));
302 assert(mp->logical_size == size);
304 spin_unlock(&meta_lock);
306 l2bsize = inode->i_blkbits;
307 l2BlocksPerPage = PAGE_CACHE_SHIFT - l2bsize;
308 page_index = lblock >> l2BlocksPerPage;
309 page_offset = (lblock - (page_index << l2BlocksPerPage)) <<
311 if ((page_offset + size) > PAGE_CACHE_SIZE) {
312 spin_unlock(&meta_lock);
313 jERROR(1, ("MetaData crosses page boundary!!\n"));
318 * Locks held on aggregate inode pages are usually
319 * not held long, and they are taken in critical code
320 * paths (committing dirty inodes, txCommit thread)
322 * Attempt to get metapage without blocking, tapping into
323 * reserves if necessary.
325 if (JFS_IP(inode)->fileset == AGGREGATE_I)
330 mp = alloc_metapage(&dropped_lock, no_wait);
332 /* alloc_metapage blocked, we need to search the hash
335 struct metapage *mp2;
336 mp2 = search_hash(hash_ptr, mapping, lblock);
346 set_bit(META_absolute, &mp->flag);
347 mp->xflag = COMMIT_PAGE;
349 atomic_set(&mp->nohomeok,0);
350 mp->mapping = mapping;
353 mp->logical_size = size;
354 add_to_hash(mp, hash_ptr);
355 spin_unlock(&meta_lock);
359 ("__get_metapage: Calling grab_cache_page\n"));
360 mp->page = grab_cache_page(mapping, page_index);
362 jERROR(1, ("grab_cache_page failed!\n"));
365 INCREMENT(mpStat.pagealloc);
366 UnlockPage(mp->page);
370 ("__get_metapage: Calling read_cache_page\n"));
371 mp->page = read_cache_page(mapping, lblock,
372 (filler_t *)mapping->a_ops->readpage, NULL);
373 if (IS_ERR(mp->page)) {
374 jERROR(1, ("read_cache_page failed!\n"));
377 INCREMENT(mpStat.pagealloc);
379 mp->data = kmap(mp->page) + page_offset;
381 jFYI(1, ("__get_metapage: returning = 0x%p\n", mp));
385 spin_lock(&meta_lock);
386 remove_from_hash(mp, hash_ptr);
388 spin_unlock(&meta_lock);
392 void hold_metapage(struct metapage * mp, int force)
394 spin_lock(&meta_lock);
399 ASSERT (!(test_bit(META_forced, &mp->flag)));
400 if (trylock_metapage(mp))
401 set_bit(META_forced, &mp->flag);
405 spin_unlock(&meta_lock);
408 static void __write_metapage(struct metapage * mp)
410 int l2bsize = mp->mapping->host->i_blkbits;
411 int l2BlocksPerPage = PAGE_CACHE_SHIFT - l2bsize;
412 unsigned long page_index;
413 unsigned long page_offset;
416 jFYI(1, ("__write_metapage: mp = 0x%p\n", mp));
418 if (test_bit(META_discard, &mp->flag)) {
420 * This metadata is no longer valid
422 clear_bit(META_dirty, &mp->flag);
426 page_index = mp->page->index;
428 (mp->index - (page_index << l2BlocksPerPage)) << l2bsize;
431 rc = mp->mapping->a_ops->prepare_write(NULL, mp->page, page_offset,
435 jERROR(1, ("prepare_write return %d!\n", rc));
436 ClearPageUptodate(mp->page);
437 UnlockPage(mp->page);
439 clear_bit(META_dirty, &mp->flag);
442 rc = mp->mapping->a_ops->commit_write(NULL, mp->page, page_offset,
446 jERROR(1, ("commit_write returned %d\n", rc));
449 UnlockPage(mp->page);
450 clear_bit(META_dirty, &mp->flag);
452 jFYI(1, ("__write_metapage done\n"));
455 static inline void sync_metapage(struct metapage *mp)
457 struct page *page = mp->page;
459 page_cache_get(page);
462 /* we're done with this page - no need to check for errors */
464 writeout_one_page(page);
465 waitfor_one_page(page);
469 page_cache_release(page);
472 void release_metapage(struct metapage * mp)
477 ("release_metapage: mp = 0x%p, flag = 0x%lx\n", mp,
480 spin_lock(&meta_lock);
481 if (test_bit(META_forced, &mp->flag)) {
482 clear_bit(META_forced, &mp->flag);
484 spin_unlock(&meta_lock);
489 if (--mp->count || atomic_read(&mp->nohomeok)) {
491 spin_unlock(&meta_lock);
493 remove_from_hash(mp, meta_hash(mp->mapping, mp->index));
494 spin_unlock(&meta_lock);
499 if (test_bit(META_dirty, &mp->flag))
500 __write_metapage(mp);
501 if (test_bit(META_sync, &mp->flag)) {
503 clear_bit(META_sync, &mp->flag);
506 if (test_bit(META_discard, &mp->flag)) {
508 block_flushpage(mp->page, 0);
509 UnlockPage(mp->page);
512 page_cache_release(mp->page);
513 INCREMENT(mpStat.pagefree);
518 * Remove metapage from logsynclist.
526 list_del(&mp->synclist);
532 jFYI(1, ("release_metapage: done\n"));
535 void __invalidate_metapages(struct inode *ip, s64 addr, int len)
537 struct metapage **hash_ptr;
538 unsigned long lblock;
539 int l2BlocksPerPage = PAGE_CACHE_SHIFT - ip->i_blkbits;
540 /* All callers are interested in block device's mapping */
541 struct address_space *mapping = ip->i_sb->s_bdev->bd_inode->i_mapping;
546 * First, mark metapages to discard. They will eventually be
547 * released, but should not be written.
549 for (lblock = addr; lblock < addr + len;
550 lblock += 1 << l2BlocksPerPage) {
551 hash_ptr = meta_hash(mapping, lblock);
552 spin_lock(&meta_lock);
553 mp = search_hash(hash_ptr, mapping, lblock);
555 set_bit(META_discard, &mp->flag);
556 spin_unlock(&meta_lock);
558 block_flushpage(mp->page, 0);
559 UnlockPage(mp->page);
561 spin_unlock(&meta_lock);
562 page = find_lock_page(mapping, lblock>>l2BlocksPerPage);
564 block_flushpage(page, 0);
571 #ifdef CONFIG_JFS_STATISTICS
572 int jfs_mpstat_read(char *buffer, char **start, off_t offset, int length,
573 int *eof, void *data)
578 len += sprintf(buffer,
579 "JFS Metapage statistics\n"
580 "=======================\n"
581 "page allocations = %d\n"
584 "allocation waits = %d\n",
591 *start = buffer + begin;