fs/jfs/jfs_metapage.c

   1 /*
   2  *   Copyright (C) International Business Machines Corp., 2000-2004
   3  *   Portions Copyright (C) Christoph Hellwig, 2001-2002
   4  *
   5  *   This program is free software;  you can redistribute it and/or modify
   6  *   it under the terms of the GNU General Public License as published by
   7  *   the Free Software Foundation; either version 2 of the License, or
   8  *   (at your option) any later version.
   9  *
  10  *   This program is distributed in the hope that it will be useful,
  11  *   but WITHOUT ANY WARRANTY;  without even the implied warranty of
  12  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
  13  *   the GNU General Public License for more details.
  14  *
  15  *   You should have received a copy of the GNU General Public License
  16  *   along with this program;  if not, write to the Free Software
  17  *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  18  */
  19
  20 #include <linux/fs.h>
  21 #include <linux/init.h>
  22 #include "jfs_incore.h"
  23 #include "jfs_superblock.h"
  24 #include "jfs_filsys.h"
  25 #include "jfs_metapage.h"
  26 #include "jfs_txnmgr.h"
  27 #include "jfs_debug.h"
  28
  29 extern struct task_struct *jfsCommitTask;
  30 static spinlock_t meta_lock = SPIN_LOCK_UNLOCKED;
  31 static wait_queue_head_t meta_wait;
  32
  33 #ifdef CONFIG_JFS_STATISTICS
  34 struct {
  35         uint    pagealloc;      /* # of page allocations */
  36         uint    pagefree;       /* # of page frees */
  37         uint    lockwait;       /* # of sleeping lock_metapage() calls */
  38         uint    allocwait;      /* # of sleeping alloc_metapage() calls */
  39 } mpStat;
  40 #endif
  41
  42
  43 #define HASH_BITS 10            /* This makes hash_table 1 4K page */
  44 #define HASH_SIZE (1 << HASH_BITS)
  45 static struct metapage **hash_table = NULL;
  46 static unsigned long hash_order;
  47
  48
  49 static inline int metapage_locked(struct metapage *mp)
  50 {
  51         return test_bit(META_locked, &mp->flag);
  52 }
  53
  54 static inline int trylock_metapage(struct metapage *mp)
  55 {
  56         return test_and_set_bit(META_locked, &mp->flag);
  57 }
  58
  59 static inline void unlock_metapage(struct metapage *mp)
  60 {
  61         clear_bit(META_locked, &mp->flag);
  62         wake_up(&mp->wait);
  63 }
  64
  65 static void __lock_metapage(struct metapage *mp)
  66 {
  67         DECLARE_WAITQUEUE(wait, current);
  68
  69         INCREMENT(mpStat.lockwait);
  70
  71         add_wait_queue_exclusive(&mp->wait, &wait);
  72         do {
  73                 set_current_state(TASK_UNINTERRUPTIBLE);
  74                 if (metapage_locked(mp)) {
  75                         spin_unlock(&meta_lock);
  76                         schedule();
  77                         spin_lock(&meta_lock);
  78                 }
  79         } while (trylock_metapage(mp));
  80         __set_current_state(TASK_RUNNING);
  81         remove_wait_queue(&mp->wait, &wait);
  82 }
  83
  84 /* needs meta_lock */
  85 static inline void lock_metapage(struct metapage *mp)
  86 {
  87         if (trylock_metapage(mp))
  88                 __lock_metapage(mp);
  89 }
  90
  91 /*
  92  * metapage pool is based on Linux 2.5's mempool
  93  *
  94  * Tap into reserved structures in critical paths where waiting on a
  95  * memory allocation could cause deadlock
  96  */
  97 #define METAPOOL_MIN_PAGES 32
  98 static struct metapage *reserved_metapages[METAPOOL_MIN_PAGES];
  99 static int num_reserved = 0;
 100 kmem_cache_t *metapage_cache;
 101
 102 static void init_once(void *foo, kmem_cache_t *cachep, unsigned long flags)
 103 {
 104         struct metapage *mp = (struct metapage *)foo;
 105
 106         if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
 107             SLAB_CTOR_CONSTRUCTOR) {
 108                 mp->lid = 0;
 109                 mp->lsn = 0;
 110                 mp->flag = 0;
 111                 mp->data = NULL;
 112                 mp->clsn = 0;
 113                 mp->log = NULL;
 114                 set_bit(META_free, &mp->flag);
 115                 init_waitqueue_head(&mp->wait);
 116         }
 117 }
 118
 119 static void empty_reserved(void)
 120 {
 121         while (num_reserved--)
 122                 kmem_cache_free(metapage_cache,
 123                                 reserved_metapages[num_reserved]);
 124 }
 125
 126 static struct metapage *alloc_metapage(int *dropped_lock, int no_wait)
 127 {
 128         struct metapage *new;
 129
 130         *dropped_lock = 0;
 131
 132         /*
 133          * Always try an atomic alloc first, to avoid dropping the
 134          * spinlock
 135          */
 136         new = kmem_cache_alloc(metapage_cache, GFP_ATOMIC);
 137         if (new)
 138                 return new;
 139
 140         if (no_wait && num_reserved)
 141                 return reserved_metapages[--num_reserved];
 142
 143         *dropped_lock = 1;
 144         spin_unlock(&meta_lock);
 145         new = kmem_cache_alloc(metapage_cache, GFP_NOFS);
 146         spin_lock(&meta_lock);
 147         return new;
 148 }
 149
 150 static void __free_metapage(struct metapage *mp)
 151 {
 152         mp->flag = 0;
 153         set_bit(META_free, &mp->flag);
 154
 155         if (num_reserved < METAPOOL_MIN_PAGES)
 156                 reserved_metapages[num_reserved++] = mp;
 157         else
 158                 kmem_cache_free(metapage_cache, mp);
 159 }
 160
 161 static inline void free_metapage(struct metapage * mp)
 162 {
 163         spin_lock(&meta_lock);
 164         __free_metapage(mp);
 165         spin_unlock(&meta_lock);
 166 }
 167
 168 int __init metapage_init(void)
 169 {
 170         struct metapage *mp;
 171
 172         /*
 173          * Initialize wait queue
 174          */
 175         init_waitqueue_head(&meta_wait);
 176
 177         /*
 178          * Allocate the metapage structures
 179          */
 180         metapage_cache = kmem_cache_create("jfs_mp", sizeof(struct metapage),
 181                                            0, 0, init_once, NULL);
 182         if (metapage_cache == NULL)
 183                 return -ENOMEM;
 184
 185         while (num_reserved < METAPOOL_MIN_PAGES) {
 186                 mp = kmem_cache_alloc(metapage_cache, GFP_NOFS);
 187                 if (mp)
 188                         reserved_metapages[num_reserved++] = mp;
 189                 else {
 190                         empty_reserved();
 191                         kmem_cache_destroy(metapage_cache);
 192                         return -ENOMEM;
 193                 }
 194         }
 195         /*
 196          * Now the hash list
 197          */
 198         for (hash_order = 0;
 199              ((PAGE_SIZE << hash_order) / sizeof(void *)) < HASH_SIZE;
 200              hash_order++);
 201         hash_table =
 202             (struct metapage **) __get_free_pages(GFP_KERNEL, hash_order);
 203         assert(hash_table);
 204         memset(hash_table, 0, PAGE_SIZE << hash_order);
 205
 206         return 0;
 207 }
 208
 209 void metapage_exit(void)
 210 {
 211         empty_reserved();
 212         kmem_cache_destroy(metapage_cache);
 213 }
 214
 215 /*
 216  * Basically same hash as in pagemap.h, but using our hash table
 217  */
 218 static struct metapage **meta_hash(struct address_space *mapping,
 219                                    unsigned long index)
 220 {
 221 #define i (((unsigned long)mapping)/ \
 222            (sizeof(struct inode) & ~(sizeof(struct inode) -1 )))
 223 #define s(x) ((x) + ((x) >> HASH_BITS))
 224         return hash_table + (s(i + index) & (HASH_SIZE - 1));
 225 #undef i
 226 #undef s
 227 }
 228
 229 static struct metapage *search_hash(struct metapage ** hash_ptr,
 230                                     struct address_space *mapping,
 231                                unsigned long index)
 232 {
 233         struct metapage *ptr;
 234
 235         for (ptr = *hash_ptr; ptr; ptr = ptr->hash_next) {
 236                 if ((ptr->mapping == mapping) && (ptr->index == index))
 237                         return ptr;
 238         }
 239
 240         return NULL;
 241 }
 242
 243 static void add_to_hash(struct metapage * mp, struct metapage ** hash_ptr)
 244 {
 245         if (*hash_ptr)
 246                 (*hash_ptr)->hash_prev = mp;
 247
 248         mp->hash_prev = NULL;
 249         mp->hash_next = *hash_ptr;
 250         *hash_ptr = mp;
 251 }
 252
 253 static void remove_from_hash(struct metapage * mp, struct metapage ** hash_ptr)
 254 {
 255         if (mp->hash_prev)
 256                 mp->hash_prev->hash_next = mp->hash_next;
 257         else {
 258                 assert(*hash_ptr == mp);
 259                 *hash_ptr = mp->hash_next;
 260         }
 261
 262         if (mp->hash_next)
 263                 mp->hash_next->hash_prev = mp->hash_prev;
 264 }
 265
 266 struct metapage *__get_metapage(struct inode *inode, unsigned long lblock,
 267                                 unsigned int size, int absolute,
 268                                 unsigned long new)
 269 {
 270         int dropped_lock;
 271         struct metapage **hash_ptr;
 272         int l2BlocksPerPage;
 273         int l2bsize;
 274         int no_wait;
 275         struct address_space *mapping;
 276         struct metapage *mp;
 277         unsigned long page_index;
 278         unsigned long page_offset;
 279
 280         jfs_info("__get_metapage: inode = 0x%p, lblock = 0x%lx", inode, lblock);
 281
 282         if (absolute)
 283                 mapping = inode->i_sb->s_bdev->bd_inode->i_mapping;
 284         else {
 285                 /*
 286                  * If an nfs client tries to read an inode that is larger
 287                  * than any existing inodes, we may try to read past the
 288                  * end of the inode map
 289                  */
 290                 if ((lblock << inode->i_blkbits) >= inode->i_size)
 291                         return NULL;
 292                 mapping = inode->i_mapping;
 293         }
 294
 295         hash_ptr = meta_hash(mapping, lblock);
 296 again:
 297         spin_lock(&meta_lock);
 298         mp = search_hash(hash_ptr, mapping, lblock);
 299         if (mp) {
 300               page_found:
 301                 if (test_bit(META_stale, &mp->flag)) {
 302                         spin_unlock(&meta_lock);
 303                         yield();
 304                         goto again;
 305                 }
 306                 mp->count++;
 307                 lock_metapage(mp);
 308                 spin_unlock(&meta_lock);
 309                 if (test_bit(META_discard, &mp->flag)) {
 310                         if (!new) {
 311                                 jfs_error(inode->i_sb,
 312                                           "__get_metapage: using a "
 313                                           "discarded metapage");
 314                                 release_metapage(mp);
 315                                 return NULL;
 316                         }
 317                         clear_bit(META_discard, &mp->flag);
 318                 }
 319                 jfs_info("__get_metapage: found 0x%p, in hash", mp);
 320                 if (mp->logical_size != size) {
 321                         jfs_error(inode->i_sb,
 322                                   "__get_metapage: mp->logical_size != size");
 323                         release_metapage(mp);
 324                         return NULL;
 325                 }
 326         } else {
 327                 l2bsize = inode->i_blkbits;
 328                 l2BlocksPerPage = PAGE_CACHE_SHIFT - l2bsize;
 329                 page_index = lblock >> l2BlocksPerPage;
 330                 page_offset = (lblock - (page_index << l2BlocksPerPage)) <<
 331                     l2bsize;
 332                 if ((page_offset + size) > PAGE_CACHE_SIZE) {
 333                         spin_unlock(&meta_lock);
 334                         jfs_err("MetaData crosses page boundary!!");
 335                         return NULL;
 336                 }
 337
 338                 /*
 339                  * Locks held on aggregate inode pages are usually
 340                  * not held long, and they are taken in critical code
 341                  * paths (committing dirty inodes, txCommit thread)
 342                  *
 343                  * Attempt to get metapage without blocking, tapping into
 344                  * reserves if necessary.
 345                  */
 346                 if (JFS_IP(inode)->fileset == AGGREGATE_I)
 347                         no_wait = 1;
 348                 else
 349                         no_wait = 0;
 350
 351                 mp = alloc_metapage(&dropped_lock, no_wait);
 352                 if (!mp) {
 353                         spin_unlock(&meta_lock);
 354                         return NULL;
 355                 }
 356                 if (dropped_lock) {
 357                         /* alloc_metapage blocked, we need to search the hash
 358                          * again.
 359                          */
 360                         struct metapage *mp2;
 361                         mp2 = search_hash(hash_ptr, mapping, lblock);
 362                         if (mp2) {
 363                                 __free_metapage(mp);
 364                                 mp = mp2;
 365                                 goto page_found;
 366                         }
 367                 }
 368                 mp->flag = 0;
 369                 lock_metapage(mp);
 370                 if (absolute)
 371                         set_bit(META_absolute, &mp->flag);
 372                 mp->xflag = COMMIT_PAGE;
 373                 mp->count = 1;
 374                 atomic_set(&mp->nohomeok,0);
 375                 mp->mapping = mapping;
 376                 mp->index = lblock;
 377                 mp->page = 0;
 378                 mp->logical_size = size;
 379                 add_to_hash(mp, hash_ptr);
 380                 spin_unlock(&meta_lock);
 381
 382                 if (new) {
 383                         jfs_info("__get_metapage: Calling grab_cache_page");
 384                         mp->page = grab_cache_page(mapping, page_index);
 385                         if (!mp->page) {
 386                                 jfs_err("grab_cache_page failed!");
 387                                 goto freeit;
 388                         } else {
 389                                 INCREMENT(mpStat.pagealloc);
 390                                 UnlockPage(mp->page);
 391                         }
 392                 } else {
 393                         jfs_info("__get_metapage: Calling read_cache_page");
 394                         mp->page = read_cache_page(mapping, lblock,
 395                                     (filler_t *)mapping->a_ops->readpage, NULL);
 396                         if (IS_ERR(mp->page)) {
 397                                 jfs_err("read_cache_page failed!");
 398                                 goto freeit;
 399                         } else
 400                                 INCREMENT(mpStat.pagealloc);
 401                 }
 402                 mp->data = kmap(mp->page) + page_offset;
 403         }
 404
 405         if (new)
 406                 memset(mp->data, 0, PSIZE);
 407
 408         jfs_info("__get_metapage: returning = 0x%p", mp);
 409         return mp;
 410
 411 freeit:
 412         spin_lock(&meta_lock);
 413         remove_from_hash(mp, hash_ptr);
 414         __free_metapage(mp);
 415         spin_unlock(&meta_lock);
 416         return NULL;
 417 }
 418
 419 void hold_metapage(struct metapage * mp, int force)
 420 {
 421         spin_lock(&meta_lock);
 422
 423         mp->count++;
 424
 425         if (force) {
 426                 ASSERT (!(test_bit(META_forced, &mp->flag)));
 427                 if (trylock_metapage(mp))
 428                         set_bit(META_forced, &mp->flag);
 429         } else
 430                 lock_metapage(mp);
 431
 432         spin_unlock(&meta_lock);
 433 }
 434
 435 static void __write_metapage(struct metapage * mp)
 436 {
 437         int l2bsize = mp->mapping->host->i_blkbits;
 438         int l2BlocksPerPage = PAGE_CACHE_SHIFT - l2bsize;
 439         unsigned long page_index;
 440         unsigned long page_offset;
 441         int rc;
 442
 443         jfs_info("__write_metapage: mp = 0x%p", mp);
 444
 445         if (test_bit(META_discard, &mp->flag)) {
 446                 /*
 447                  * This metadata is no longer valid
 448                  */
 449                 clear_bit(META_dirty, &mp->flag);
 450                 return;
 451         }
 452
 453         page_index = mp->page->index;
 454         page_offset =
 455             (mp->index - (page_index << l2BlocksPerPage)) << l2bsize;
 456
 457         lock_page(mp->page);
 458         rc = mp->mapping->a_ops->prepare_write(NULL, mp->page, page_offset,
 459                                                page_offset +
 460                                                mp->logical_size);
 461         if (rc) {
 462                 jfs_err("prepare_write return %d!", rc);
 463                 ClearPageUptodate(mp->page);
 464                 UnlockPage(mp->page);
 465                 kunmap(mp->page);
 466                 clear_bit(META_dirty, &mp->flag);
 467                 return;
 468         }
 469         rc = mp->mapping->a_ops->commit_write(NULL, mp->page, page_offset,
 470                                               page_offset +
 471                                               mp->logical_size);
 472         if (rc) {
 473                 jfs_err("commit_write returned %d", rc);
 474         }
 475
 476         UnlockPage(mp->page);
 477         clear_bit(META_dirty, &mp->flag);
 478
 479         jfs_info("__write_metapage done");
 480 }
 481
 482 static inline void sync_metapage(struct metapage *mp)
 483 {
 484         struct page *page = mp->page;
 485
 486         page_cache_get(page);
 487         lock_page(page);
 488
 489         /* we're done with this page - no need to check for errors */
 490         if (page->buffers) {
 491                 writeout_one_page(page);
 492                 waitfor_one_page(page);
 493         }
 494
 495         UnlockPage(page);
 496         page_cache_release(page);
 497 }
 498
 499 void release_metapage(struct metapage * mp)
 500 {
 501         struct jfs_log *log;
 502
 503         jfs_info("release_metapage: mp = 0x%p, flag = 0x%lx", mp, mp->flag);
 504
 505         spin_lock(&meta_lock);
 506         if (test_bit(META_forced, &mp->flag)) {
 507                 clear_bit(META_forced, &mp->flag);
 508                 mp->count--;
 509                 spin_unlock(&meta_lock);
 510                 return;
 511         }
 512
 513         assert(mp->count);
 514         if (--mp->count || atomic_read(&mp->nohomeok)) {
 515                 unlock_metapage(mp);
 516                 spin_unlock(&meta_lock);
 517                 return;
 518         }
 519
 520         if (mp->page) {
 521                 set_bit(META_stale, &mp->flag);
 522                 spin_unlock(&meta_lock);
 523                 kunmap(mp->page);
 524                 mp->data = 0;
 525                 if (test_bit(META_dirty, &mp->flag))
 526                         __write_metapage(mp);
 527                 if (test_bit(META_sync, &mp->flag)) {
 528                         sync_metapage(mp);
 529                         clear_bit(META_sync, &mp->flag);
 530                 }
 531
 532                 if (test_bit(META_discard, &mp->flag)) {
 533                         lock_page(mp->page);
 534                         block_flushpage(mp->page, 0);
 535                         UnlockPage(mp->page);
 536                 }
 537
 538                 page_cache_release(mp->page);
 539                 mp->page = NULL;
 540                 INCREMENT(mpStat.pagefree);
 541                 spin_lock(&meta_lock);
 542         }
 543
 544         if (mp->lsn) {
 545                 /*
 546                  * Remove metapage from logsynclist.
 547                  */
 548                 log = mp->log;
 549                 LOGSYNC_LOCK(log);
 550                 mp->log = 0;
 551                 mp->lsn = 0;
 552                 mp->clsn = 0;
 553                 log->count--;
 554                 list_del(&mp->synclist);
 555                 LOGSYNC_UNLOCK(log);
 556         }
 557         remove_from_hash(mp, meta_hash(mp->mapping, mp->index));
 558         spin_unlock(&meta_lock);
 559
 560         free_metapage(mp);
 561 }
 562
 563 void __invalidate_metapages(struct inode *ip, s64 addr, int len)
 564 {
 565         struct metapage **hash_ptr;
 566         unsigned long lblock;
 567         int l2BlocksPerPage = PAGE_CACHE_SHIFT - ip->i_blkbits;
 568         /* All callers are interested in block device's mapping */
 569         struct address_space *mapping = ip->i_sb->s_bdev->bd_inode->i_mapping;
 570         struct metapage *mp;
 571         struct page *page;
 572
 573         /*
 574          * First, mark metapages to discard.  They will eventually be
 575          * released, but should not be written.
 576          */
 577         for (lblock = addr; lblock < addr + len;
 578              lblock += 1 << l2BlocksPerPage) {
 579                 hash_ptr = meta_hash(mapping, lblock);
 580 again:
 581                 spin_lock(&meta_lock);
 582                 mp = search_hash(hash_ptr, mapping, lblock);
 583                 if (mp) {
 584                         if (test_bit(META_stale, &mp->flag)) {
 585                                 spin_unlock(&meta_lock);
 586                                 yield();
 587                                 goto again;
 588                         }
 589
 590                         set_bit(META_discard, &mp->flag);
 591                         spin_unlock(&meta_lock);
 592                 } else {
 593                         spin_unlock(&meta_lock);
 594                         page = find_lock_page(mapping, lblock>>l2BlocksPerPage);
 595                         if (page) {
 596                                 block_flushpage(page, 0);
 597                                 UnlockPage(page);
 598                                 page_cache_release(page);
 599                         }
 600                 }
 601         }
 602 }
 603
 604 #ifdef CONFIG_JFS_STATISTICS
 605 int jfs_mpstat_read(char *buffer, char **start, off_t offset, int length,
 606                     int *eof, void *data)
 607 {
 608         int len = 0;
 609         off_t begin;
 610
 611         len += sprintf(buffer,
 612                        "JFS Metapage statistics\n"
 613                        "=======================\n"
 614                        "page allocations = %d\n"
 615                        "page frees = %d\n"
 616                        "lock waits = %d\n"
 617                        "allocation waits = %d\n",
 618                        mpStat.pagealloc,
 619                        mpStat.pagefree,
 620                        mpStat.lockwait,
 621                        mpStat.allocwait);
 622
 623         begin = offset;
 624         *start = buffer + begin;
 625         len -= begin;
 626
 627         if (len > length)
 628                 len = length;
 629         else
 630                 *eof = 1;
 631
 632         if (len < 0)
 633                 len = 0;
 634
 635         return len;
 636 }
 637 #endif