fs/jfs/jfs_metapage.c

   1 /*
   2  *   Copyright (c) International Business Machines Corp., 2000-2002
   3  *   Portions Copyright (c) Christoph Hellwig, 2001-2002
   4  *
   5  *   This program is free software;  you can redistribute it and/or modify
   6  *   it under the terms of the GNU General Public License as published by
   7  *   the Free Software Foundation; either version 2 of the License, or
   8  *   (at your option) any later version.
   9  *
  10  *   This program is distributed in the hope that it will be useful,
  11  *   but WITHOUT ANY WARRANTY;  without even the implied warranty of
  12  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
  13  *   the GNU General Public License for more details.
  14  *
  15  *   You should have received a copy of the GNU General Public License
  16  *   along with this program;  if not, write to the Free Software
  17  *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  18  */
  19
  20 #include <linux/fs.h>
  21 #include <linux/init.h>
  22 #include "jfs_incore.h"
  23 #include "jfs_filsys.h"
  24 #include "jfs_metapage.h"
  25 #include "jfs_txnmgr.h"
  26 #include "jfs_debug.h"
  27
  28 extern struct task_struct *jfsCommitTask;
  29 static spinlock_t meta_lock = SPIN_LOCK_UNLOCKED;
  30 static wait_queue_head_t meta_wait;
  31
  32 #ifdef CONFIG_JFS_STATISTICS
  33 struct {
  34         uint    pagealloc;      /* # of page allocations */
  35         uint    pagefree;       /* # of page frees */
  36         uint    lockwait;       /* # of sleeping lock_metapage() calls */
  37         uint    allocwait;      /* # of sleeping alloc_metapage() calls */
  38 } mpStat;
  39 #endif
  40
  41
  42 #define HASH_BITS 10            /* This makes hash_table 1 4K page */
  43 #define HASH_SIZE (1 << HASH_BITS)
  44 static struct metapage **hash_table = NULL;
  45 static unsigned long hash_order;
  46
  47
  48 static inline int metapage_locked(struct metapage *mp)
  49 {
  50         return test_bit(META_locked, &mp->flag);
  51 }
  52
  53 static inline int trylock_metapage(struct metapage *mp)
  54 {
  55         return test_and_set_bit(META_locked, &mp->flag);
  56 }
  57
  58 static inline void unlock_metapage(struct metapage *mp)
  59 {
  60         clear_bit(META_locked, &mp->flag);
  61         wake_up(&mp->wait);
  62 }
  63
  64 static void __lock_metapage(struct metapage *mp)
  65 {
  66         DECLARE_WAITQUEUE(wait, current);
  67
  68         INCREMENT(mpStat.lockwait);
  69
  70         add_wait_queue_exclusive(&mp->wait, &wait);
  71         do {
  72                 set_current_state(TASK_UNINTERRUPTIBLE);
  73                 if (metapage_locked(mp)) {
  74                         spin_unlock(&meta_lock);
  75                         schedule();
  76                         spin_lock(&meta_lock);
  77                 }
  78         } while (trylock_metapage(mp));
  79         __set_current_state(TASK_RUNNING);
  80         remove_wait_queue(&mp->wait, &wait);
  81 }
  82
  83 /* needs meta_lock */
  84 static inline void lock_metapage(struct metapage *mp)
  85 {
  86         if (trylock_metapage(mp))
  87                 __lock_metapage(mp);
  88 }
  89
  90 /*
  91  * metapage pool is based on Linux 2.5's mempool
  92  *
  93  * Tap into reserved structures in critical paths where waiting on a
  94  * memory allocation could cause deadlock
  95  */
  96 #define METAPOOL_MIN_PAGES 32
  97 static struct metapage *reserved_metapages[METAPOOL_MIN_PAGES];
  98 static int num_reserved = 0;
  99 kmem_cache_t *metapage_cache;
 100
 101 static void init_once(void *foo, kmem_cache_t *cachep, unsigned long flags)
 102 {
 103         struct metapage *mp = (struct metapage *)foo;
 104
 105         if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
 106             SLAB_CTOR_CONSTRUCTOR) {
 107                 mp->lid = 0;
 108                 mp->lsn = 0;
 109                 mp->flag = 0;
 110                 mp->data = NULL;
 111                 mp->clsn = 0;
 112                 mp->log = NULL;
 113                 set_bit(META_free, &mp->flag);
 114                 init_waitqueue_head(&mp->wait);
 115         }
 116 }
 117
 118 static void empty_reserved(void)
 119 {
 120         while (num_reserved--)
 121                 kmem_cache_free(metapage_cache,
 122                                 reserved_metapages[num_reserved]);
 123 }
 124
 125 static struct metapage *alloc_metapage(int *dropped_lock, int no_wait)
 126 {
 127         struct metapage *new;
 128
 129         *dropped_lock = 0;
 130
 131         /*
 132          * Always try an atomic alloc first, to avoid dropping the
 133          * spinlock
 134          */
 135         new = kmem_cache_alloc(metapage_cache, GFP_ATOMIC);
 136         if (new)
 137                 return new;
 138
 139         if (no_wait && num_reserved)
 140                 return reserved_metapages[--num_reserved];
 141
 142         *dropped_lock = 1;
 143         spin_unlock(&meta_lock);
 144         new = kmem_cache_alloc(metapage_cache, GFP_NOFS);
 145         spin_lock(&meta_lock);
 146         return new;
 147 }
 148
 149 static void __free_metapage(struct metapage *mp)
 150 {
 151         mp->flag = 0;
 152         set_bit(META_free, &mp->flag);
 153
 154         if (num_reserved < METAPOOL_MIN_PAGES)
 155                 reserved_metapages[num_reserved++] = mp;
 156         else
 157                 kmem_cache_free(metapage_cache, mp);
 158 }
 159
 160 static inline void free_metapage(struct metapage * mp)
 161 {
 162         spin_lock(&meta_lock);
 163         __free_metapage(mp);
 164         spin_unlock(&meta_lock);
 165 }
 166
 167 int __init metapage_init(void)
 168 {
 169         struct metapage *mp;
 170
 171         /*
 172          * Initialize wait queue
 173          */
 174         init_waitqueue_head(&meta_wait);
 175
 176         /*
 177          * Allocate the metapage structures
 178          */
 179         metapage_cache = kmem_cache_create("jfs_mp", sizeof(struct metapage),
 180                                            0, 0, init_once, NULL);
 181         if (metapage_cache == NULL)
 182                 return -ENOMEM;
 183
 184         while (num_reserved < METAPOOL_MIN_PAGES) {
 185                 mp = kmem_cache_alloc(metapage_cache, GFP_NOFS);
 186                 if (mp)
 187                         reserved_metapages[num_reserved++] = mp;
 188                 else {
 189                         empty_reserved();
 190                         kmem_cache_destroy(metapage_cache);
 191                         return -ENOMEM;
 192                 }
 193         }
 194         /*
 195          * Now the hash list
 196          */
 197         for (hash_order = 0;
 198              ((PAGE_SIZE << hash_order) / sizeof(void *)) < HASH_SIZE;
 199              hash_order++);
 200         hash_table =
 201             (struct metapage **) __get_free_pages(GFP_KERNEL, hash_order);
 202         assert(hash_table);
 203         memset(hash_table, 0, PAGE_SIZE << hash_order);
 204
 205         return 0;
 206 }
 207
 208 void metapage_exit(void)
 209 {
 210         empty_reserved();
 211         kmem_cache_destroy(metapage_cache);
 212 }
 213
 214 /*
 215  * Basically same hash as in pagemap.h, but using our hash table
 216  */
 217 static struct metapage **meta_hash(struct address_space *mapping,
 218                                    unsigned long index)
 219 {
 220 #define i (((unsigned long)mapping)/ \
 221            (sizeof(struct inode) & ~(sizeof(struct inode) -1 )))
 222 #define s(x) ((x) + ((x) >> HASH_BITS))
 223         return hash_table + (s(i + index) & (HASH_SIZE - 1));
 224 #undef i
 225 #undef s
 226 }
 227
 228 static struct metapage *search_hash(struct metapage ** hash_ptr,
 229                                     struct address_space *mapping,
 230                                unsigned long index)
 231 {
 232         struct metapage *ptr;
 233
 234         for (ptr = *hash_ptr; ptr; ptr = ptr->hash_next) {
 235                 if ((ptr->mapping == mapping) && (ptr->index == index))
 236                         return ptr;
 237         }
 238
 239         return NULL;
 240 }
 241
 242 static void add_to_hash(struct metapage * mp, struct metapage ** hash_ptr)
 243 {
 244         if (*hash_ptr)
 245                 (*hash_ptr)->hash_prev = mp;
 246
 247         mp->hash_prev = NULL;
 248         mp->hash_next = *hash_ptr;
 249         *hash_ptr = mp;
 250 }
 251
 252 static void remove_from_hash(struct metapage * mp, struct metapage ** hash_ptr)
 253 {
 254         if (mp->hash_prev)
 255                 mp->hash_prev->hash_next = mp->hash_next;
 256         else {
 257                 assert(*hash_ptr == mp);
 258                 *hash_ptr = mp->hash_next;
 259         }
 260
 261         if (mp->hash_next)
 262                 mp->hash_next->hash_prev = mp->hash_prev;
 263 }
 264
 265 struct metapage *__get_metapage(struct inode *inode, unsigned long lblock,
 266                                 unsigned int size, int absolute,
 267                                 unsigned long new)
 268 {
 269         int dropped_lock;
 270         struct metapage **hash_ptr;
 271         int l2BlocksPerPage;
 272         int l2bsize;
 273         int no_wait;
 274         struct address_space *mapping;
 275         struct metapage *mp;
 276         unsigned long page_index;
 277         unsigned long page_offset;
 278
 279         jFYI(1, ("__get_metapage: inode = 0x%p, lblock = 0x%lx\n",
 280                  inode, lblock));
 281
 282         if (absolute)
 283                 mapping = inode->i_sb->s_bdev->bd_inode->i_mapping;
 284         else
 285                 mapping = inode->i_mapping;
 286
 287         spin_lock(&meta_lock);
 288
 289         hash_ptr = meta_hash(mapping, lblock);
 290
 291         mp = search_hash(hash_ptr, mapping, lblock);
 292         if (mp) {
 293               page_found:
 294                 if (test_bit(META_discard, &mp->flag)) {
 295                         assert(new);    /* It's okay to reuse a discarded
 296                                          * if we expect it to be empty
 297                                          */
 298                         clear_bit(META_discard, &mp->flag);
 299                 }
 300                 mp->count++;
 301                 jFYI(1, ("__get_metapage: found 0x%p, in hash\n", mp));
 302                 assert(mp->logical_size == size);
 303                 lock_metapage(mp);
 304                 spin_unlock(&meta_lock);
 305         } else {
 306                 l2bsize = inode->i_blkbits;
 307                 l2BlocksPerPage = PAGE_CACHE_SHIFT - l2bsize;
 308                 page_index = lblock >> l2BlocksPerPage;
 309                 page_offset = (lblock - (page_index << l2BlocksPerPage)) <<
 310                     l2bsize;
 311                 if ((page_offset + size) > PAGE_CACHE_SIZE) {
 312                         spin_unlock(&meta_lock);
 313                         jERROR(1, ("MetaData crosses page boundary!!\n"));
 314                         return NULL;
 315                 }
 316
 317                 /*
 318                  * Locks held on aggregate inode pages are usually
 319                  * not held long, and they are taken in critical code
 320                  * paths (committing dirty inodes, txCommit thread)
 321                  *
 322                  * Attempt to get metapage without blocking, tapping into
 323                  * reserves if necessary.
 324                  */
 325                 if (JFS_IP(inode)->fileset == AGGREGATE_I)
 326                         no_wait = 1;
 327                 else
 328                         no_wait = 0;
 329
 330                 mp = alloc_metapage(&dropped_lock, no_wait);
 331                 if (dropped_lock) {
 332                         /* alloc_metapage blocked, we need to search the hash
 333                          * again.
 334                          */
 335                         struct metapage *mp2;
 336                         mp2 = search_hash(hash_ptr, mapping, lblock);
 337                         if (mp2) {
 338                                 __free_metapage(mp);
 339                                 mp = mp2;
 340                                 goto page_found;
 341                         }
 342                 }
 343                 mp->flag = 0;
 344                 lock_metapage(mp);
 345                 if (absolute)
 346                         set_bit(META_absolute, &mp->flag);
 347                 mp->xflag = COMMIT_PAGE;
 348                 mp->count = 1;
 349                 atomic_set(&mp->nohomeok,0);
 350                 mp->mapping = mapping;
 351                 mp->index = lblock;
 352                 mp->page = 0;
 353                 mp->logical_size = size;
 354                 add_to_hash(mp, hash_ptr);
 355                 spin_unlock(&meta_lock);
 356
 357                 if (new) {
 358                         jFYI(1,
 359                              ("__get_metapage: Calling grab_cache_page\n"));
 360                         mp->page = grab_cache_page(mapping, page_index);
 361                         if (!mp->page) {
 362                                 jERROR(1, ("grab_cache_page failed!\n"));
 363                                 goto freeit;
 364                         } else {
 365                                 INCREMENT(mpStat.pagealloc);
 366                                 UnlockPage(mp->page);
 367                         }
 368                 } else {
 369                         jFYI(1,
 370                              ("__get_metapage: Calling read_cache_page\n"));
 371                         mp->page = read_cache_page(mapping, lblock,
 372                                     (filler_t *)mapping->a_ops->readpage, NULL);
 373                         if (IS_ERR(mp->page)) {
 374                                 jERROR(1, ("read_cache_page failed!\n"));
 375                                 goto freeit;
 376                         } else
 377                                 INCREMENT(mpStat.pagealloc);
 378                 }
 379                 mp->data = kmap(mp->page) + page_offset;
 380         }
 381         jFYI(1, ("__get_metapage: returning = 0x%p\n", mp));
 382         return mp;
 383
 384 freeit:
 385         spin_lock(&meta_lock);
 386         remove_from_hash(mp, hash_ptr);
 387         __free_metapage(mp);
 388         spin_unlock(&meta_lock);
 389         return NULL;
 390 }
 391
 392 void hold_metapage(struct metapage * mp, int force)
 393 {
 394         spin_lock(&meta_lock);
 395
 396         mp->count++;
 397
 398         if (force) {
 399                 ASSERT (!(test_bit(META_forced, &mp->flag)));
 400                 if (trylock_metapage(mp))
 401                         set_bit(META_forced, &mp->flag);
 402         } else
 403                 lock_metapage(mp);
 404
 405         spin_unlock(&meta_lock);
 406 }
 407
 408 static void __write_metapage(struct metapage * mp)
 409 {
 410         int l2bsize = mp->mapping->host->i_blkbits;
 411         int l2BlocksPerPage = PAGE_CACHE_SHIFT - l2bsize;
 412         unsigned long page_index;
 413         unsigned long page_offset;
 414         int rc;
 415
 416         jFYI(1, ("__write_metapage: mp = 0x%p\n", mp));
 417
 418         if (test_bit(META_discard, &mp->flag)) {
 419                 /*
 420                  * This metadata is no longer valid
 421                  */
 422                 clear_bit(META_dirty, &mp->flag);
 423                 return;
 424         }
 425
 426         page_index = mp->page->index;
 427         page_offset =
 428             (mp->index - (page_index << l2BlocksPerPage)) << l2bsize;
 429
 430         lock_page(mp->page);
 431         rc = mp->mapping->a_ops->prepare_write(NULL, mp->page, page_offset,
 432                                                page_offset +
 433                                                mp->logical_size);
 434         if (rc) {
 435                 jERROR(1, ("prepare_write return %d!\n", rc));
 436                 ClearPageUptodate(mp->page);
 437                 UnlockPage(mp->page);
 438                 kunmap(mp->page);
 439                 clear_bit(META_dirty, &mp->flag);
 440                 return;
 441         }
 442         rc = mp->mapping->a_ops->commit_write(NULL, mp->page, page_offset,
 443                                               page_offset +
 444                                               mp->logical_size);
 445         if (rc) {
 446                 jERROR(1, ("commit_write returned %d\n", rc));
 447         }
 448
 449         UnlockPage(mp->page);
 450         clear_bit(META_dirty, &mp->flag);
 451
 452         jFYI(1, ("__write_metapage done\n"));
 453 }
 454
 455 static inline void sync_metapage(struct metapage *mp)
 456 {
 457         struct page *page = mp->page;
 458
 459         page_cache_get(page);
 460         lock_page(page);
 461
 462         /* we're done with this page - no need to check for errors */
 463         if (page->buffers) {
 464                 writeout_one_page(page);
 465                 waitfor_one_page(page);
 466         }
 467
 468         UnlockPage(page);
 469         page_cache_release(page);
 470 }
 471
 472 void release_metapage(struct metapage * mp)
 473 {
 474         struct jfs_log *log;
 475
 476         jFYI(1,
 477              ("release_metapage: mp = 0x%p, flag = 0x%lx\n", mp,
 478               mp->flag));
 479
 480         spin_lock(&meta_lock);
 481         if (test_bit(META_forced, &mp->flag)) {
 482                 clear_bit(META_forced, &mp->flag);
 483                 mp->count--;
 484                 spin_unlock(&meta_lock);
 485                 return;
 486         }
 487
 488         assert(mp->count);
 489         if (--mp->count || atomic_read(&mp->nohomeok)) {
 490                 unlock_metapage(mp);
 491                 spin_unlock(&meta_lock);
 492         } else {
 493                 remove_from_hash(mp, meta_hash(mp->mapping, mp->index));
 494                 spin_unlock(&meta_lock);
 495
 496                 if (mp->page) {
 497                         kunmap(mp->page);
 498                         mp->data = 0;
 499                         if (test_bit(META_dirty, &mp->flag))
 500                                 __write_metapage(mp);
 501                         if (test_bit(META_sync, &mp->flag)) {
 502                                 sync_metapage(mp);
 503                                 clear_bit(META_sync, &mp->flag);
 504                         }
 505
 506                         if (test_bit(META_discard, &mp->flag)) {
 507                                 lock_page(mp->page);
 508                                 block_flushpage(mp->page, 0);
 509                                 UnlockPage(mp->page);
 510                         }
 511
 512                         page_cache_release(mp->page);
 513                         INCREMENT(mpStat.pagefree);
 514                 }
 515
 516                 if (mp->lsn) {
 517                         /*
 518                          * Remove metapage from logsynclist.
 519                          */
 520                         log = mp->log;
 521                         LOGSYNC_LOCK(log);
 522                         mp->log = 0;
 523                         mp->lsn = 0;
 524                         mp->clsn = 0;
 525                         log->count--;
 526                         list_del(&mp->synclist);
 527                         LOGSYNC_UNLOCK(log);
 528                 }
 529
 530                 free_metapage(mp);
 531         }
 532         jFYI(1, ("release_metapage: done\n"));
 533 }
 534
 535 void __invalidate_metapages(struct inode *ip, s64 addr, int len)
 536 {
 537         struct metapage **hash_ptr;
 538         unsigned long lblock;
 539         int l2BlocksPerPage = PAGE_CACHE_SHIFT - ip->i_blkbits;
 540         /* All callers are interested in block device's mapping */
 541         struct address_space *mapping = ip->i_sb->s_bdev->bd_inode->i_mapping;
 542         struct metapage *mp;
 543         struct page *page;
 544
 545         /*
 546          * First, mark metapages to discard.  They will eventually be
 547          * released, but should not be written.
 548          */
 549         for (lblock = addr; lblock < addr + len;
 550              lblock += 1 << l2BlocksPerPage) {
 551                 hash_ptr = meta_hash(mapping, lblock);
 552                 spin_lock(&meta_lock);
 553                 mp = search_hash(hash_ptr, mapping, lblock);
 554                 if (mp) {
 555                         set_bit(META_discard, &mp->flag);
 556                         spin_unlock(&meta_lock);
 557                         lock_page(mp->page);
 558                         block_flushpage(mp->page, 0);
 559                         UnlockPage(mp->page);
 560                 } else {
 561                         spin_unlock(&meta_lock);
 562                         page = find_lock_page(mapping, lblock>>l2BlocksPerPage);
 563                         if (page) {
 564                                 block_flushpage(page, 0);
 565                                 UnlockPage(page);
 566                         }
 567                 }
 568         }
 569 }
 570
 571 #ifdef CONFIG_JFS_STATISTICS
 572 int jfs_mpstat_read(char *buffer, char **start, off_t offset, int length,
 573                     int *eof, void *data)
 574 {
 575         int len = 0;
 576         off_t begin;
 577
 578         len += sprintf(buffer,
 579                        "JFS Metapage statistics\n"
 580                        "=======================\n"
 581                        "page allocations = %d\n"
 582                        "page frees = %d\n"
 583                        "lock waits = %d\n"
 584                        "allocation waits = %d\n",
 585                        mpStat.pagealloc,
 586                        mpStat.pagefree,
 587                        mpStat.lockwait,
 588                        mpStat.allocwait);
 589
 590         begin = offset;
 591         *start = buffer + begin;
 592         len -= begin;
 593
 594         if (len > length)
 595                 len = length;
 596         else
 597                 *eof = 1;
 598
 599         if (len < 0)
 600                 len = 0;
 601
 602         return len;
 603 }
 604 #endif