fs/nfs/pagelist.c

   1 /*
   2  * linux/fs/nfs/pagelist.c
   3  *
   4  * A set of helper functions for managing NFS read and write requests.
   5  * The main purpose of these routines is to provide support for the
   6  * coalescing of several requests into a single RPC call.
   7  *
   8  * Copyright 2000, 2001 (c) Trond Myklebust <trond.myklebust@fys.uio.no>
   9  *
  10  */
  11
  12 #include <linux/config.h>
  13 #include <linux/slab.h>
  14 #include <linux/file.h>
  15 #include <linux/sunrpc/clnt.h>
  16 #include <linux/nfs3.h>
  17 #include <linux/nfs_page.h>
  18 #include <linux/nfs_fs.h>
  19 #include <linux/nfs_flushd.h>
  20 #include <linux/nfs_mount.h>
  21
  22 #define NFS_PARANOIA 1
  23
  24 /*
  25  * Spinlock
  26  */
  27 spinlock_t nfs_wreq_lock = SPIN_LOCK_UNLOCKED;
  28
  29 static kmem_cache_t *nfs_page_cachep;
  30
  31 static inline struct nfs_page *
  32 nfs_page_alloc(void)
  33 {
  34         struct nfs_page *p;
  35         p = kmem_cache_alloc(nfs_page_cachep, SLAB_NOFS);
  36         if (p) {
  37                 memset(p, 0, sizeof(*p));
  38                 INIT_LIST_HEAD(&p->wb_hash);
  39                 INIT_LIST_HEAD(&p->wb_list);
  40                 INIT_LIST_HEAD(&p->wb_lru);
  41                 init_waitqueue_head(&p->wb_wait);
  42         }
  43         return p;
  44 }
  45
  46 static inline void
  47 nfs_page_free(struct nfs_page *p)
  48 {
  49         kmem_cache_free(nfs_page_cachep, p);
  50 }
  51
  52 static int nfs_try_to_free_pages(struct nfs_server *);
  53
  54 /**
  55  * nfs_create_request - Create an NFS read/write request.
  56  * @cred: RPC credential to use
  57  * @inode: inode to which the request is attached
  58  * @page: page to write
  59  * @offset: starting offset within the page for the write
  60  * @count: number of bytes to read/write
  61  *
  62  * The page must be locked by the caller. This makes sure we never
  63  * create two different requests for the same page, and avoids
  64  * a possible deadlock when we reach the hard limit on the number
  65  * of dirty pages.
  66  * User should ensure it is safe to sleep in this function.
  67  */
  68 struct nfs_page *
  69 nfs_create_request(struct rpc_cred *cred, struct inode *inode,
  70                    struct page *page,
  71                    unsigned int offset, unsigned int count)
  72 {
  73         struct nfs_server *server = NFS_SERVER(inode);
  74         struct nfs_reqlist      *cache = NFS_REQUESTLIST(inode);
  75         struct nfs_page         *req;
  76
  77         /* Deal with hard limits.  */
  78         for (;;) {
  79                 /* Prevent races by incrementing *before* we test */
  80                 atomic_inc(&cache->nr_requests);
  81
  82                 /* If we haven't reached the local hard limit yet,
  83                  * try to allocate the request struct */
  84                 if (atomic_read(&cache->nr_requests) <= MAX_REQUEST_HARD) {
  85                         req = nfs_page_alloc();
  86                         if (req != NULL)
  87                                 break;
  88                 }
  89
  90                 atomic_dec(&cache->nr_requests);
  91
  92                 /* Try to free up at least one request in order to stay
  93                  * below the hard limit
  94                  */
  95                 if (nfs_try_to_free_pages(server))
  96                         continue;
  97                 if (signalled() && (server->flags & NFS_MOUNT_INTR))
  98                         return ERR_PTR(-ERESTARTSYS);
  99                 yield();
 100         }
 101
 102         /* Initialize the request struct. Initially, we assume a
 103          * long write-back delay. This will be adjusted in
 104          * update_nfs_request below if the region is not locked. */
 105         req->wb_page    = page;
 106         page_cache_get(page);
 107         req->wb_offset  = offset;
 108         req->wb_bytes   = count;
 109
 110         if (cred)
 111                 req->wb_cred = get_rpccred(cred);
 112         req->wb_inode   = inode;
 113         req->wb_count   = 1;
 114
 115         return req;
 116 }
 117
 118 /**
 119  * nfs_clear_request - Free up all resources allocated to the request
 120  * @req:
 121  *
 122  * Release all resources associated with a write request after it
 123  * has completed.
 124  */
 125 void nfs_clear_request(struct nfs_page *req)
 126 {
 127         /* Release struct file or cached credential */
 128         if (req->wb_file) {
 129                 fput(req->wb_file);
 130                 req->wb_file = NULL;
 131         }
 132         if (req->wb_cred) {
 133                 put_rpccred(req->wb_cred);
 134                 req->wb_cred = NULL;
 135         }
 136         if (req->wb_page) {
 137                 atomic_dec(&NFS_REQUESTLIST(req->wb_inode)->nr_requests);
 138 #ifdef NFS_PARANOIA
 139                 BUG_ON(atomic_read(&NFS_REQUESTLIST(req->wb_inode)->nr_requests) < 0);
 140 #endif
 141                 page_cache_release(req->wb_page);
 142                 req->wb_page = NULL;
 143         }
 144 }
 145
 146
 147 /**
 148  * nfs_release_request - Release the count on an NFS read/write request
 149  * @req: request to release
 150  *
 151  * Note: Should never be called with the spinlock held!
 152  */
 153 void
 154 nfs_release_request(struct nfs_page *req)
 155 {
 156         spin_lock(&nfs_wreq_lock);
 157         if (--req->wb_count) {
 158                 spin_unlock(&nfs_wreq_lock);
 159                 return;
 160         }
 161         __nfs_del_lru(req);
 162         spin_unlock(&nfs_wreq_lock);
 163
 164 #ifdef NFS_PARANOIA
 165         BUG_ON(!list_empty(&req->wb_list));
 166         BUG_ON(!list_empty(&req->wb_hash));
 167         BUG_ON(NFS_WBACK_BUSY(req));
 168 #endif
 169
 170         /* Release struct file or cached credential */
 171         nfs_clear_request(req);
 172         nfs_page_free(req);
 173 }
 174
 175 /**
 176  * nfs_list_add_request - Insert a request into a sorted list
 177  * @req: request
 178  * @head: head of list into which to insert the request.
 179  *
 180  * Note that the wb_list is sorted by page index in order to facilitate
 181  * coalescing of requests.
 182  * We use an insertion sort that is optimized for the case of appended
 183  * writes.
 184  */
 185 void
 186 nfs_list_add_request(struct nfs_page *req, struct list_head *head)
 187 {
 188         struct list_head *pos;
 189         unsigned long pg_idx = page_index(req->wb_page);
 190
 191 #ifdef NFS_PARANOIA
 192         if (!list_empty(&req->wb_list)) {
 193                 printk(KERN_ERR "NFS: Add to list failed!\n");
 194                 BUG();
 195         }
 196 #endif
 197         list_for_each_prev(pos, head) {
 198                 struct nfs_page *p = nfs_list_entry(pos);
 199                 if (page_index(p->wb_page) < pg_idx)
 200                         break;
 201         }
 202         list_add(&req->wb_list, pos);
 203         req->wb_list_head = head;
 204 }
 205
 206 /**
 207  * nfs_wait_on_request - Wait for a request to complete.
 208  * @req: request to wait upon.
 209  *
 210  * Interruptible by signals only if mounted with intr flag.
 211  * The user is responsible for holding a count on the request.
 212  */
 213 int
 214 nfs_wait_on_request(struct nfs_page *req)
 215 {
 216         struct inode    *inode = req->wb_inode;
 217         struct rpc_clnt *clnt = NFS_CLIENT(inode);
 218
 219         if (!NFS_WBACK_BUSY(req))
 220                 return 0;
 221         return nfs_wait_event(clnt, req->wb_wait, !NFS_WBACK_BUSY(req));
 222 }
 223
 224 /**
 225  * nfs_coalesce_requests - Split coalesced requests out from a list.
 226  * @head: source list
 227  * @dst: destination list
 228  * @nmax: maximum number of requests to coalesce
 229  *
 230  * Moves a maximum of 'nmax' elements from one list to another.
 231  * The elements are checked to ensure that they form a contiguous set
 232  * of pages, and that they originated from the same file.
 233  */
 234 int
 235 nfs_coalesce_requests(struct list_head *head, struct list_head *dst,
 236                       unsigned int nmax)
 237 {
 238         struct nfs_page         *req = NULL;
 239         unsigned int            npages = 0;
 240
 241         while (!list_empty(head)) {
 242                 struct nfs_page *prev = req;
 243
 244                 req = nfs_list_entry(head->next);
 245                 if (prev) {
 246                         if (req->wb_cred != prev->wb_cred)
 247                                 break;
 248                         if (page_index(req->wb_page) != page_index(prev->wb_page)+1)
 249                                 break;
 250
 251                         if (req->wb_offset != 0)
 252                                 break;
 253                 }
 254                 nfs_list_remove_request(req);
 255                 nfs_list_add_request(req, dst);
 256                 npages++;
 257                 if (req->wb_offset + req->wb_bytes != PAGE_CACHE_SIZE)
 258                         break;
 259                 if (npages >= nmax)
 260                         break;
 261         }
 262         return npages;
 263 }
 264
 265 /*
 266  * nfs_scan_forward - Coalesce more requests
 267  * @req: First request to add
 268  * @dst: destination list
 269  * @nmax: maximum number of requests to coalesce
 270  *
 271  * Tries to coalesce more requests by traversing the request's wb_list.
 272  * Moves the resulting list into dst. Requests are guaranteed to be
 273  * contiguous, and to originate from the same file.
 274  */
 275 static int
 276 nfs_scan_forward(struct nfs_page *req, struct list_head *dst, int nmax)
 277 {
 278         struct nfs_server *server = NFS_SERVER(req->wb_inode);
 279         struct list_head *pos, *head = req->wb_list_head;
 280         struct rpc_cred *cred = req->wb_cred;
 281         unsigned long idx = page_index(req->wb_page) + 1;
 282         int npages = 0;
 283
 284         for (pos = req->wb_list.next; nfs_lock_request(req); pos = pos->next) {
 285                 nfs_list_remove_request(req);
 286                 nfs_list_add_request(req, dst);
 287                 __nfs_del_lru(req);
 288                 __nfs_add_lru(&server->lru_busy, req);
 289                 npages++;
 290                 if (npages == nmax)
 291                         break;
 292                 if (pos == head)
 293                         break;
 294                 if (req->wb_offset + req->wb_bytes != PAGE_CACHE_SIZE)
 295                         break;
 296                 req = nfs_list_entry(pos);
 297                 if (page_index(req->wb_page) != idx++)
 298                         break;
 299                 if (req->wb_offset != 0)
 300                         break;
 301                 if (req->wb_cred != cred)
 302                         break;
 303         }
 304         return npages;
 305 }
 306
 307 /**
 308  * nfs_scan_lru - Scan one of the least recently used list
 309  * @head: One of the NFS superblock lru lists
 310  * @dst: Destination list
 311  * @nmax: maximum number of requests to coalesce
 312  *
 313  * Scans one of the NFS superblock lru lists for upto nmax requests
 314  * and returns them on a list. The requests are all guaranteed to be
 315  * contiguous, originating from the same inode and the same file.
 316  */
 317 int
 318 nfs_scan_lru(struct list_head *head, struct list_head *dst, int nmax)
 319 {
 320         struct list_head *pos;
 321         struct nfs_page *req;
 322         int npages = 0;
 323
 324         list_for_each(pos, head) {
 325                 req = nfs_lru_entry(pos);
 326                 npages = nfs_scan_forward(req, dst, nmax);
 327                 if (npages)
 328                         break;
 329         }
 330         return npages;
 331 }
 332
 333 /**
 334  * nfs_scan_lru_timeout - Scan one of the superblock lru lists for timed out requests
 335  * @head: One of the NFS superblock lru lists
 336  * @dst: Destination list
 337  * @nmax: maximum number of requests to coalesce
 338  *
 339  * Scans one of the NFS superblock lru lists for upto nmax requests
 340  * and returns them on a list. The requests are all guaranteed to be
 341  * contiguous, originating from the same inode and the same file.
 342  * The first request on the destination list will be timed out, the
 343  * others are not guaranteed to be so.
 344  */
 345 int
 346 nfs_scan_lru_timeout(struct list_head *head, struct list_head *dst, int nmax)
 347 {
 348         struct list_head *pos;
 349         struct nfs_page *req;
 350         int npages = 0;
 351
 352         list_for_each(pos, head) {
 353                 req = nfs_lru_entry(pos);
 354                 if (time_after(req->wb_timeout, jiffies))
 355                         break;
 356                 npages = nfs_scan_forward(req, dst, nmax);
 357                 if (npages)
 358                         break;
 359         }
 360         return npages;
 361 }
 362
 363 /**
 364  * nfs_scan_list - Scan a list for matching requests
 365  * @head: One of the NFS inode request lists
 366  * @dst: Destination list
 367  * @idx_start: lower bound of page->index to scan
 368  * @npages: idx_start + npages sets the upper bound to scan.
 369  *
 370  * Moves elements from one of the inode request lists.
 371  * If the number of requests is set to 0, the entire address_space
 372  * starting at index idx_start, is scanned.
 373  * The requests are *not* checked to ensure that they form a contiguous set.
 374  * You must be holding the nfs_wreq_lock when calling this function
 375  */
 376 int
 377 nfs_scan_list(struct list_head *head, struct list_head *dst,
 378               unsigned long idx_start, unsigned int npages)
 379 {
 380         struct list_head        *pos, *tmp;
 381         struct nfs_page         *req;
 382         unsigned long           idx_end;
 383         int                     res;
 384
 385         res = 0;
 386         if (npages == 0)
 387                 idx_end = ~0;
 388         else
 389                 idx_end = idx_start + npages - 1;
 390
 391         list_for_each_safe(pos, tmp, head) {
 392                 unsigned long pg_idx;
 393
 394                 req = nfs_list_entry(pos);
 395
 396                 pg_idx = page_index(req->wb_page);
 397                 if (pg_idx < idx_start)
 398                         continue;
 399                 if (pg_idx > idx_end)
 400                         break;
 401
 402                 if (!nfs_lock_request(req))
 403                         continue;
 404                 nfs_list_remove_request(req);
 405                 nfs_list_add_request(req, dst);
 406                 __nfs_del_lru(req);
 407                 __nfs_add_lru(&NFS_SERVER(req->wb_inode)->lru_busy, req);
 408                 res++;
 409         }
 410         return res;
 411 }
 412
 413 /*
 414  * nfs_try_to_free_pages - Free up NFS read/write requests
 415  * @server: The NFS superblock
 416  *
 417  * This function attempts to flush out NFS reads and writes in order
 418  * to keep the hard limit on the total number of pending requests
 419  * on a given NFS partition.
 420  * Note: we first try to commit unstable writes, then flush out pending
 421  *       reads, then finally the dirty pages.
 422  *       The assumption is that this reflects the ordering from the fastest
 423  *       to the slowest method for reclaiming requests.
 424  */
 425 static int
 426 nfs_try_to_free_pages(struct nfs_server *server)
 427 {
 428         LIST_HEAD(head);
 429         struct nfs_page *req = NULL;
 430         int nreq;
 431
 432         for (;;) {
 433                 if (req) {
 434                         int status = nfs_wait_on_request(req);
 435                         nfs_release_request(req);
 436                         if (status)
 437                                 break;
 438                         req = NULL;
 439                 }
 440                 nreq = atomic_read(&server->rw_requests->nr_requests);
 441                 if (nreq < MAX_REQUEST_HARD)
 442                         return 1;
 443                 spin_lock(&nfs_wreq_lock);
 444                 /* Are there any busy RPC calls that might free up requests? */
 445                 if (!list_empty(&server->lru_busy)) {
 446                         req = nfs_lru_entry(server->lru_busy.next);
 447                         req->wb_count++;
 448                         __nfs_del_lru(req);
 449                         spin_unlock(&nfs_wreq_lock);
 450                         continue;
 451                 }
 452
 453 #ifdef CONFIG_NFS_V3
 454                 /* Let's try to free up some completed NFSv3 unstable writes */
 455                 nfs_scan_lru_commit(server, &head);
 456                 if (!list_empty(&head)) {
 457                         spin_unlock(&nfs_wreq_lock);
 458                         nfs_commit_list(&head, 0);
 459                         continue;
 460                 }
 461 #endif
 462                 /* OK, so we try to free up some pending readaheads */
 463                 nfs_scan_lru_read(server, &head);
 464                 if (!list_empty(&head)) {
 465                         spin_unlock(&nfs_wreq_lock);
 466                         nfs_pagein_list(&head, server->rpages);
 467                         continue;
 468                 }
 469                 /* Last resort: we try to flush out single requests */
 470                 nfs_scan_lru_dirty(server, &head);
 471                 if (!list_empty(&head)) {
 472                         spin_unlock(&nfs_wreq_lock);
 473                         nfs_flush_list(&head, server->wpages, FLUSH_STABLE);
 474                         continue;
 475                 }
 476                 spin_unlock(&nfs_wreq_lock);
 477                 break;
 478         }
 479         /* We failed to free up requests */
 480         return 0;
 481 }
 482
 483 int nfs_init_nfspagecache(void)
 484 {
 485         nfs_page_cachep = kmem_cache_create("nfs_page",
 486                                             sizeof(struct nfs_page),
 487                                             0, SLAB_HWCACHE_ALIGN,
 488                                             NULL, NULL);
 489         if (nfs_page_cachep == NULL)
 490                 return -ENOMEM;
 491
 492         return 0;
 493 }
 494
 495 void nfs_destroy_nfspagecache(void)
 496 {
 497         if (kmem_cache_destroy(nfs_page_cachep))
 498                 printk(KERN_INFO "nfs_page: not all structures were freed\n");
 499 }
 500