4 * Writing file data over NFS.
6 * We do it like this: When a (user) process wishes to write data to an
7 * NFS file, a write request is allocated that contains the RPC task data
8 * plus some info on the page to be written, and added to the inode's
9 * write chain. If the process writes past the end of the page, an async
10 * RPC call to write the page is scheduled immediately; otherwise, the call
11 * is delayed for a few seconds.
13 * Just like readahead, no async I/O is performed if wsize < PAGE_SIZE.
15 * Write requests are kept on the inode's writeback list. Each entry in
16 * that list references the page (portion) to be written. When the
17 * cache timeout has expired, the RPC task is woken up, and tries to
18 * lock the page. As soon as it manages to do so, the request is moved
19 * from the writeback list to the writelock list.
21 * Note: we must make sure never to confuse the inode passed in the
22 * write_page request with the one in page->inode. As far as I understand
23 * it, these are different when doing a swap-out.
25 * To understand everything that goes on here and in the NFS read code,
26 * one should be aware that a page is locked in exactly one of the following
29 * - A write request is in progress.
30 * - A user process is in generic_file_write/nfs_update_page
31 * - A user process is in generic_file_read
33 * Also note that because of the way pages are invalidated in
34 * nfs_revalidate_inode, the following assertions hold:
36 * - If a page is dirty, there will be no read requests (a page will
37 * not be re-read unless invalidated by nfs_revalidate_inode).
38 * - If the page is not uptodate, there will be no pending write
39 * requests, and no process will be in nfs_update_page.
41 * FIXME: Interaction with the vmscan routines is not optimal yet.
42 * Either vmscan must be made nfs-savvy, or we need a different page
43 * reclaim concept that supports something like FS-independent
44 * buffer_heads with a b_ops-> field.
46 * Copyright (C) 1996, 1997, Olaf Kirch <okir@monad.swb.de>
49 #include <linux/config.h>
50 #include <linux/types.h>
51 #include <linux/slab.h>
52 #include <linux/swap.h>
53 #include <linux/pagemap.h>
54 #include <linux/file.h>
56 #include <linux/sunrpc/clnt.h>
57 #include <linux/nfs_fs.h>
58 #include <linux/nfs_mount.h>
59 #include <linux/nfs_flushd.h>
60 #include <linux/nfs_page.h>
61 #include <asm/uaccess.h>
62 #include <linux/smp_lock.h>
64 #define NFSDBG_FACILITY NFSDBG_PAGECACHE
69 * This is the struct where the WRITE/COMMIT arguments go.
71 struct nfs_write_data {
74 struct rpc_cred *cred;
75 struct nfs_writeargs args; /* argument struct */
76 struct nfs_writeres res; /* result struct */
77 struct nfs_fattr fattr;
78 struct nfs_writeverf verf;
79 struct list_head pages; /* Coalesced requests we wish to flush */
80 struct page *pagevec[NFS_WRITE_MAXIOV];
84 * Local function declarations
86 static struct nfs_page * nfs_update_request(struct file*, struct inode *,
88 unsigned int, unsigned int);
89 static void nfs_strategy(struct inode *inode);
90 static void nfs_writeback_done(struct rpc_task *);
92 static void nfs_commit_done(struct rpc_task *);
95 /* Hack for future NFS swap support */
97 # define IS_SWAPFILE(inode) (0)
100 static kmem_cache_t *nfs_wdata_cachep;
102 static __inline__ struct nfs_write_data *nfs_writedata_alloc(void)
104 struct nfs_write_data *p;
105 p = kmem_cache_alloc(nfs_wdata_cachep, SLAB_NOFS);
107 memset(p, 0, sizeof(*p));
108 INIT_LIST_HEAD(&p->pages);
109 p->args.pages = p->pagevec;
114 static __inline__ void nfs_writedata_free(struct nfs_write_data *p)
116 kmem_cache_free(nfs_wdata_cachep, p);
119 static void nfs_writedata_release(struct rpc_task *task)
121 struct nfs_write_data *wdata = (struct nfs_write_data *)task->tk_calldata;
122 nfs_writedata_free(wdata);
126 * This function will be used to simulate weak cache consistency
127 * under NFSv2 when the NFSv3 attribute patch is included.
128 * For the moment, we just call nfs_refresh_inode().
130 static __inline__ int
131 nfs_write_attributes(struct inode *inode, struct nfs_fattr *fattr)
133 if ((fattr->valid & NFS_ATTR_FATTR) && !(fattr->valid & NFS_ATTR_WCC)) {
134 fattr->pre_size = NFS_CACHE_ISIZE(inode);
135 fattr->pre_mtime = NFS_CACHE_MTIME(inode);
136 fattr->pre_ctime = NFS_CACHE_CTIME(inode);
137 fattr->valid |= NFS_ATTR_WCC;
139 return nfs_refresh_inode(inode, fattr);
143 * Write a page synchronously.
144 * Offset is the data offset within the page.
147 nfs_writepage_sync(struct file *file, struct inode *inode, struct page *page,
148 unsigned int offset, unsigned int count)
150 struct rpc_cred *cred = NULL;
152 unsigned int wsize = NFS_SERVER(inode)->wsize;
153 int result, refresh = 0, written = 0, flags;
155 struct nfs_fattr fattr;
156 struct nfs_writeverf verf;
160 cred = get_rpccred(nfs_file_cred(file));
162 cred = get_rpccred(NFS_I(inode)->mm_cred);
164 dprintk("NFS: nfs_writepage_sync(%x/%Ld %d@%Ld)\n",
165 inode->i_dev, (long long)NFS_FILEID(inode),
166 count, (long long)(page_offset(page) + offset));
168 base = page_offset(page) + offset;
170 flags = ((IS_SWAPFILE(inode)) ? NFS_RW_SWAP : 0) | NFS_RW_SYNC;
173 if (count < wsize && !IS_SWAPFILE(inode))
176 result = NFS_PROTO(inode)->write(inode, cred, &fattr, flags,
177 offset, wsize, page, &verf);
178 nfs_write_attributes(inode, &fattr);
181 /* Must mark the page invalid after I/O error */
182 ClearPageUptodate(page);
186 printk("NFS: short write, wsize=%u, result=%d\n",
195 * If we've extended the file, update the inode
196 * now so we don't invalidate the cache.
198 if (base > inode->i_size)
199 inode->i_size = base;
203 ClearPageError(page);
209 return written? written : result;
213 nfs_writepage_async(struct file *file, struct inode *inode, struct page *page,
214 unsigned int offset, unsigned int count)
216 struct nfs_page *req;
220 req = nfs_update_request(file, inode, page, offset, count);
221 status = (IS_ERR(req)) ? PTR_ERR(req) : 0;
225 req->wb_cred = get_rpccred(NFS_I(inode)->mm_cred);
226 nfs_unlock_request(req);
228 end = ((loff_t)page->index<<PAGE_CACHE_SHIFT) + (loff_t)(offset + count);
229 if (inode->i_size < end)
237 * Write an mmapped page to the server.
240 nfs_writepage(struct page *page)
242 struct inode *inode = page->mapping->host;
243 unsigned long end_index;
244 unsigned offset = PAGE_CACHE_SIZE;
247 end_index = inode->i_size >> PAGE_CACHE_SHIFT;
249 /* Ensure we've flushed out any previous writes */
250 nfs_wb_page(inode,page);
253 if (page->index < end_index)
255 /* things got complicated... */
256 offset = inode->i_size & (PAGE_CACHE_SIZE-1);
258 /* OK, are we completely out? */
260 if (page->index >= end_index+1 || !offset)
264 if (NFS_SERVER(inode)->wsize >= PAGE_CACHE_SIZE && !IS_SYNC(inode)) {
265 err = nfs_writepage_async(NULL, inode, page, 0, offset);
269 err = nfs_writepage_sync(NULL, inode, page, 0, offset);
280 * Check whether the file range we want to write to is locked by
284 region_locked(struct inode *inode, struct nfs_page *req)
286 struct file_lock *fl;
287 loff_t rqstart, rqend;
289 /* Don't optimize writes if we don't use NLM */
290 if (NFS_SERVER(inode)->flags & NFS_MOUNT_NONLM)
293 rqstart = page_offset(req->wb_page) + req->wb_offset;
294 rqend = rqstart + req->wb_bytes;
295 for (fl = inode->i_flock; fl; fl = fl->fl_next) {
296 if (fl->fl_owner == current->files && (fl->fl_flags & FL_POSIX)
297 && fl->fl_type == F_WRLCK
298 && fl->fl_start <= rqstart && rqend <= fl->fl_end) {
307 * Insert a write request into an inode
308 * Note: we sort the list in order to be able to optimize nfs_find_request()
309 * & co. for the 'write append' case. For 2.5 we may want to consider
310 * some form of hashing so as to perform well on random writes.
313 nfs_inode_add_request(struct inode *inode, struct nfs_page *req)
315 struct list_head *pos, *head;
316 unsigned long pg_idx = page_index(req->wb_page);
318 if (!list_empty(&req->wb_hash))
320 if (!NFS_WBACK_BUSY(req))
321 printk(KERN_ERR "NFS: unlocked request attempted hashed!\n");
322 head = &inode->u.nfs_i.writeback;
323 if (list_empty(head))
325 list_for_each_prev(pos, head) {
326 struct nfs_page *entry = nfs_inode_wb_entry(pos);
327 if (page_index(entry->wb_page) < pg_idx)
330 inode->u.nfs_i.npages++;
331 list_add(&req->wb_hash, pos);
336 * Insert a write request into an inode
339 nfs_inode_remove_request(struct nfs_page *req)
342 spin_lock(&nfs_wreq_lock);
343 if (list_empty(&req->wb_hash)) {
344 spin_unlock(&nfs_wreq_lock);
347 if (!NFS_WBACK_BUSY(req))
348 printk(KERN_ERR "NFS: unlocked request attempted unhashed!\n");
349 inode = req->wb_inode;
350 list_del(&req->wb_hash);
351 INIT_LIST_HEAD(&req->wb_hash);
352 inode->u.nfs_i.npages--;
353 if ((inode->u.nfs_i.npages == 0) != list_empty(&inode->u.nfs_i.writeback))
354 printk(KERN_ERR "NFS: desynchronized value of nfs_i.npages.\n");
355 if (list_empty(&inode->u.nfs_i.writeback)) {
356 spin_unlock(&nfs_wreq_lock);
359 spin_unlock(&nfs_wreq_lock);
360 nfs_clear_request(req);
361 nfs_release_request(req);
367 static inline struct nfs_page *
368 _nfs_find_request(struct inode *inode, struct page *page)
370 struct list_head *head, *pos;
371 unsigned long pg_idx = page_index(page);
373 head = &inode->u.nfs_i.writeback;
374 list_for_each_prev(pos, head) {
375 struct nfs_page *req = nfs_inode_wb_entry(pos);
376 unsigned long found_idx = page_index(req->wb_page);
378 if (pg_idx < found_idx)
380 if (pg_idx != found_idx)
388 static struct nfs_page *
389 nfs_find_request(struct inode *inode, struct page *page)
391 struct nfs_page *req;
393 spin_lock(&nfs_wreq_lock);
394 req = _nfs_find_request(inode, page);
395 spin_unlock(&nfs_wreq_lock);
400 * Add a request to the inode's dirty list.
403 nfs_mark_request_dirty(struct nfs_page *req)
405 struct inode *inode = req->wb_inode;
407 spin_lock(&nfs_wreq_lock);
408 nfs_list_add_request(req, &inode->u.nfs_i.dirty);
409 inode->u.nfs_i.ndirty++;
411 __nfs_add_lru(&NFS_SERVER(inode)->lru_dirty, req);
412 spin_unlock(&nfs_wreq_lock);
413 mark_inode_dirty(inode);
417 * Check if a request is dirty
420 nfs_dirty_request(struct nfs_page *req)
422 struct inode *inode = req->wb_inode;
423 return !list_empty(&req->wb_list) && req->wb_list_head == &inode->u.nfs_i.dirty;
428 * Add a request to the inode's commit list.
431 nfs_mark_request_commit(struct nfs_page *req)
433 struct inode *inode = req->wb_inode;
435 spin_lock(&nfs_wreq_lock);
436 nfs_list_add_request(req, &inode->u.nfs_i.commit);
437 inode->u.nfs_i.ncommit++;
439 __nfs_add_lru(&NFS_SERVER(inode)->lru_commit, req);
440 spin_unlock(&nfs_wreq_lock);
441 mark_inode_dirty(inode);
446 * Wait for a request to complete.
448 * Interruptible by signals only if mounted with intr flag.
451 nfs_wait_on_requests(struct inode *inode, struct file *file, unsigned long idx_start, unsigned int npages)
453 struct list_head *p, *head;
454 unsigned long idx_end;
455 unsigned int res = 0;
461 idx_end = idx_start + npages - 1;
463 head = &inode->u.nfs_i.writeback;
465 spin_lock(&nfs_wreq_lock);
466 list_for_each_prev(p, head) {
467 unsigned long pg_idx;
468 struct nfs_page *req = nfs_inode_wb_entry(p);
470 if (file && req->wb_file != file)
473 pg_idx = page_index(req->wb_page);
474 if (pg_idx < idx_start)
476 if (pg_idx > idx_end)
479 if (!NFS_WBACK_BUSY(req))
482 spin_unlock(&nfs_wreq_lock);
483 error = nfs_wait_on_request(req);
484 nfs_release_request(req);
490 spin_unlock(&nfs_wreq_lock);
495 * nfs_scan_lru_dirty_timeout - Scan LRU list for timed out dirty requests
496 * @server: NFS superblock data
497 * @dst: destination list
499 * Moves a maximum of 'wpages' requests from the NFS dirty page LRU list.
500 * The elements are checked to ensure that they form a contiguous set
501 * of pages, and that they originated from the same file.
504 nfs_scan_lru_dirty_timeout(struct nfs_server *server, struct list_head *dst)
509 npages = nfs_scan_lru_timeout(&server->lru_dirty, dst, server->wpages);
511 inode = nfs_list_entry(dst->next)->wb_inode;
512 inode->u.nfs_i.ndirty -= npages;
518 * nfs_scan_lru_dirty - Scan LRU list for dirty requests
519 * @server: NFS superblock data
520 * @dst: destination list
522 * Moves a maximum of 'wpages' requests from the NFS dirty page LRU list.
523 * The elements are checked to ensure that they form a contiguous set
524 * of pages, and that they originated from the same file.
527 nfs_scan_lru_dirty(struct nfs_server *server, struct list_head *dst)
532 npages = nfs_scan_lru(&server->lru_dirty, dst, server->wpages);
534 inode = nfs_list_entry(dst->next)->wb_inode;
535 inode->u.nfs_i.ndirty -= npages;
541 * nfs_scan_dirty - Scan an inode for dirty requests
542 * @inode: NFS inode to scan
543 * @dst: destination list
544 * @file: if set, ensure we match requests from this file
545 * @idx_start: lower bound of page->index to scan.
546 * @npages: idx_start + npages sets the upper bound to scan.
548 * Moves requests from the inode's dirty page list.
549 * The requests are *not* checked to ensure that they form a contiguous set.
552 nfs_scan_dirty(struct inode *inode, struct list_head *dst, struct file *file, unsigned long idx_start, unsigned int npages)
555 res = nfs_scan_list(&inode->u.nfs_i.dirty, dst, file, idx_start, npages);
556 inode->u.nfs_i.ndirty -= res;
557 if ((inode->u.nfs_i.ndirty == 0) != list_empty(&inode->u.nfs_i.dirty))
558 printk(KERN_ERR "NFS: desynchronized value of nfs_i.ndirty.\n");
564 * nfs_scan_lru_commit_timeout - Scan LRU list for timed out commit requests
565 * @server: NFS superblock data
566 * @dst: destination list
568 * Finds the first a timed out request in the NFS commit LRU list and moves it
569 * to the list dst. If such an element is found, we move all other commit
570 * requests that apply to the same inode.
571 * The assumption is that doing everything in a single commit-to-disk is
572 * the cheaper alternative.
575 nfs_scan_lru_commit_timeout(struct nfs_server *server, struct list_head *dst)
580 npages = nfs_scan_lru_timeout(&server->lru_commit, dst, 1);
582 inode = nfs_list_entry(dst->next)->wb_inode;
583 npages += nfs_scan_list(&inode->u.nfs_i.commit, dst, NULL, 0, 0);
584 inode->u.nfs_i.ncommit -= npages;
591 * nfs_scan_lru_commit_timeout - Scan LRU list for timed out commit requests
592 * @server: NFS superblock data
593 * @dst: destination list
595 * Finds the first request in the NFS commit LRU list and moves it
596 * to the list dst. If such an element is found, we move all other commit
597 * requests that apply to the same inode.
598 * The assumption is that doing everything in a single commit-to-disk is
599 * the cheaper alternative.
602 nfs_scan_lru_commit(struct nfs_server *server, struct list_head *dst)
607 npages = nfs_scan_lru(&server->lru_commit, dst, 1);
609 inode = nfs_list_entry(dst->next)->wb_inode;
610 npages += nfs_scan_list(&inode->u.nfs_i.commit, dst, NULL, 0, 0);
611 inode->u.nfs_i.ncommit -= npages;
617 * nfs_scan_commit - Scan an inode for commit requests
618 * @inode: NFS inode to scan
619 * @dst: destination list
620 * @file: if set, ensure we collect requests from this file only.
621 * @idx_start: lower bound of page->index to scan.
622 * @npages: idx_start + npages sets the upper bound to scan.
624 * Moves requests from the inode's 'commit' request list.
625 * The requests are *not* checked to ensure that they form a contiguous set.
628 nfs_scan_commit(struct inode *inode, struct list_head *dst, struct file *file, unsigned long idx_start, unsigned int npages)
631 res = nfs_scan_list(&inode->u.nfs_i.commit, dst, file, idx_start, npages);
632 inode->u.nfs_i.ncommit -= res;
633 if ((inode->u.nfs_i.ncommit == 0) != list_empty(&inode->u.nfs_i.commit))
634 printk(KERN_ERR "NFS: desynchronized value of nfs_i.ncommit.\n");
641 * Try to update any existing write request, or create one if there is none.
642 * In order to match, the request's credentials must match those of
643 * the calling process.
645 * Note: Should always be called with the Page Lock held!
647 static struct nfs_page *
648 nfs_update_request(struct file* file, struct inode *inode, struct page *page,
649 unsigned int offset, unsigned int bytes)
651 struct nfs_page *req, *new = NULL;
652 unsigned long rqend, end;
654 end = offset + bytes;
657 /* Loop over all inode entries and see if we find
658 * A request for the page we wish to update
660 spin_lock(&nfs_wreq_lock);
661 req = _nfs_find_request(inode, page);
663 if (!nfs_lock_request_dontget(req)) {
665 spin_unlock(&nfs_wreq_lock);
666 error = nfs_wait_on_request(req);
667 nfs_release_request(req);
669 return ERR_PTR(error);
672 spin_unlock(&nfs_wreq_lock);
674 nfs_release_request(new);
679 nfs_lock_request_dontget(new);
680 nfs_inode_add_request(inode, new);
681 spin_unlock(&nfs_wreq_lock);
682 nfs_mark_request_dirty(new);
685 spin_unlock(&nfs_wreq_lock);
687 new = nfs_create_request(nfs_file_cred(file), inode, page, offset, bytes);
694 /* If the region is locked, adjust the timeout */
695 if (region_locked(inode, new))
696 new->wb_timeout = jiffies + NFS_WRITEBACK_LOCKDELAY;
698 new->wb_timeout = jiffies + NFS_WRITEBACK_DELAY;
701 /* We have a request for our page.
702 * If the creds don't match, or the
703 * page addresses don't match,
704 * tell the caller to wait on the conflicting
707 rqend = req->wb_offset + req->wb_bytes;
708 if (req->wb_file != file
709 || req->wb_page != page
710 || !nfs_dirty_request(req)
711 || offset > rqend || end < req->wb_offset) {
712 nfs_unlock_request(req);
713 return ERR_PTR(-EBUSY);
716 /* Okay, the request matches. Update the region */
717 if (offset < req->wb_offset) {
718 req->wb_offset = offset;
719 req->wb_bytes = rqend - req->wb_offset;
723 req->wb_bytes = end - req->wb_offset;
729 * This is the strategy routine for NFS.
730 * It is called by nfs_updatepage whenever the user wrote up to the end
733 * We always try to submit a set of requests in parallel so that the
734 * server's write code can gather writes. This is mainly for the benefit
737 * We never submit more requests than we think the remote can handle.
738 * For UDP sockets, we make sure we don't exceed the congestion window;
739 * for TCP, we limit the number of requests to 8.
741 * NFS_STRATEGY_PAGES gives the minimum number of requests for NFSv2 that
742 * should be sent out in one go. This is for the benefit of NFSv2 servers
743 * that perform write gathering.
745 * FIXME: Different servers may have different sweet spots.
746 * Record the average congestion window in server struct?
748 #define NFS_STRATEGY_PAGES 8
750 nfs_strategy(struct inode *inode)
752 unsigned int dirty, wpages;
754 dirty = inode->u.nfs_i.ndirty;
755 wpages = NFS_SERVER(inode)->wpages;
757 if (NFS_PROTO(inode)->version == 2) {
758 if (dirty >= NFS_STRATEGY_PAGES * wpages)
759 nfs_flush_file(inode, NULL, 0, 0, 0);
760 } else if (dirty >= wpages)
761 nfs_flush_file(inode, NULL, 0, 0, 0);
763 if (dirty >= NFS_STRATEGY_PAGES * wpages)
764 nfs_flush_file(inode, NULL, 0, 0, 0);
769 nfs_flush_incompatible(struct file *file, struct page *page)
771 struct rpc_cred *cred = nfs_file_cred(file);
772 struct inode *inode = page->mapping->host;
773 struct nfs_page *req;
776 * Look for a request corresponding to this page. If there
777 * is one, and it belongs to another file, we flush it out
778 * before we try to copy anything into the page. Do this
779 * due to the lack of an ACCESS-type call in NFSv2.
780 * Also do the same if we find a request from an existing
783 req = nfs_find_request(inode,page);
785 if (req->wb_file != file || req->wb_cred != cred || req->wb_page != page)
786 status = nfs_wb_page(inode, page);
787 nfs_release_request(req);
789 return (status < 0) ? status : 0;
793 * Update and possibly write a cached page of an NFS file.
795 * XXX: Keep an eye on generic_file_read to make sure it doesn't do bad
796 * things with a page scheduled for an RPC call (e.g. invalidate it).
799 nfs_updatepage(struct file *file, struct page *page, unsigned int offset, unsigned int count)
801 struct dentry *dentry = file->f_dentry;
802 struct inode *inode = page->mapping->host;
803 struct nfs_page *req;
807 dprintk("NFS: nfs_updatepage(%s/%s %d@%Ld)\n",
808 dentry->d_parent->d_name.name, dentry->d_name.name,
809 count, (long long)(page_offset(page) +offset));
812 * If wsize is smaller than page size, update and write
813 * page synchronously.
815 if (NFS_SERVER(inode)->wsize < PAGE_CACHE_SIZE || IS_SYNC(inode))
816 return nfs_writepage_sync(file, inode, page, offset, count);
819 * Try to find an NFS request corresponding to this page
821 * If the existing request cannot be updated, we must flush
825 req = nfs_update_request(file, inode, page, offset, count);
826 status = (IS_ERR(req)) ? PTR_ERR(req) : 0;
827 if (status != -EBUSY)
829 /* Request could not be updated. Flush it out and try again */
830 status = nfs_wb_page(inode, page);
831 } while (status >= 0);
836 end = ((loff_t)page->index<<PAGE_CACHE_SHIFT) + (loff_t)(offset + count);
837 if (inode->i_size < end)
840 /* If we wrote past the end of the page.
841 * Call the strategy routine so it can send out a bunch
844 if (req->wb_offset == 0 && req->wb_bytes == PAGE_CACHE_SIZE) {
845 SetPageUptodate(page);
846 nfs_unlock_request(req);
849 nfs_unlock_request(req);
851 dprintk("NFS: nfs_updatepage returns %d (isize %Ld)\n",
852 status, (long long)inode->i_size);
854 ClearPageUptodate(page);
859 * Set up the argument/result storage required for the RPC call.
862 nfs_write_rpcsetup(struct list_head *head, struct nfs_write_data *data)
864 struct nfs_page *req;
868 /* Set up the RPC argument and reply structs
869 * NB: take care not to mess about with data->commit et al. */
871 pages = data->args.pages;
873 while (!list_empty(head)) {
874 struct nfs_page *req = nfs_list_entry(head->next);
875 nfs_list_remove_request(req);
876 nfs_list_add_request(req, &data->pages);
877 *pages++ = req->wb_page;
878 count += req->wb_bytes;
880 req = nfs_list_entry(data->pages.next);
881 data->inode = req->wb_inode;
882 data->cred = req->wb_cred;
883 data->args.fh = NFS_FH(req->wb_inode);
884 data->args.offset = page_offset(req->wb_page) + req->wb_offset;
885 data->args.pgbase = req->wb_offset;
886 data->args.count = count;
887 data->res.fattr = &data->fattr;
888 data->res.count = count;
889 data->res.verf = &data->verf;
894 * Create an RPC task for the given write request and kick it.
895 * The page must have been locked by the caller.
897 * It may happen that the page we're passed is not marked dirty.
898 * This is the case if nfs_updatepage detects a conflicting request
899 * that has been written but not committed.
902 nfs_flush_one(struct list_head *head, struct inode *inode, int how)
904 struct rpc_clnt *clnt = NFS_CLIENT(inode);
905 struct nfs_write_data *data;
906 struct rpc_task *task;
907 struct rpc_message msg;
909 nfsvers = NFS_PROTO(inode)->version,
910 async = !(how & FLUSH_SYNC),
911 stable = (how & FLUSH_STABLE);
915 data = nfs_writedata_alloc();
920 /* Set the initial flags for the task. */
921 flags = (async) ? RPC_TASK_ASYNC : 0;
923 /* Set up the argument struct */
924 nfs_write_rpcsetup(head, data);
926 data->args.stable = NFS_FILE_SYNC;
928 if (!inode->u.nfs_i.ncommit)
929 data->args.stable = NFS_FILE_SYNC;
931 data->args.stable = NFS_DATA_SYNC;
933 data->args.stable = NFS_UNSTABLE;
935 /* Finalize the task. */
936 rpc_init_task(task, clnt, nfs_writeback_done, flags);
937 task->tk_calldata = data;
938 /* Release requests */
939 task->tk_release = nfs_writedata_release;
942 msg.rpc_proc = (nfsvers == 3) ? NFS3PROC_WRITE : NFSPROC_WRITE;
944 msg.rpc_proc = NFSPROC_WRITE;
946 msg.rpc_argp = &data->args;
947 msg.rpc_resp = &data->res;
948 msg.rpc_cred = data->cred;
950 dprintk("NFS: %4d initiated write call (req %x/%Ld count %u)\n",
953 (long long)NFS_FILEID(inode),
956 rpc_clnt_sigmask(clnt, &oldset);
957 rpc_call_setup(task, &msg, 0);
961 rpc_clnt_sigunmask(clnt, &oldset);
964 while (!list_empty(head)) {
965 struct nfs_page *req = nfs_list_entry(head->next);
966 nfs_list_remove_request(req);
967 nfs_mark_request_dirty(req);
968 nfs_unlock_request(req);
974 nfs_flush_list(struct list_head *head, int wpages, int how)
976 LIST_HEAD(one_request);
977 struct nfs_page *req;
979 unsigned int pages = 0;
981 while (!list_empty(head)) {
982 pages += nfs_coalesce_requests(head, &one_request, wpages);
983 req = nfs_list_entry(one_request.next);
984 error = nfs_flush_one(&one_request, req->wb_inode, how);
991 while (!list_empty(head)) {
992 req = nfs_list_entry(head->next);
993 nfs_list_remove_request(req);
994 nfs_mark_request_dirty(req);
995 nfs_unlock_request(req);
1002 * This function is called when the WRITE call is complete.
1005 nfs_writeback_done(struct rpc_task *task)
1007 struct nfs_write_data *data = (struct nfs_write_data *) task->tk_calldata;
1008 struct nfs_writeargs *argp = &data->args;
1009 struct nfs_writeres *resp = &data->res;
1010 struct inode *inode = data->inode;
1011 struct nfs_page *req;
1014 dprintk("NFS: %4d nfs_writeback_done (status %d)\n",
1015 task->tk_pid, task->tk_status);
1017 if (nfs_async_handle_jukebox(task))
1020 /* We can't handle that yet but we check for it nevertheless */
1021 if (resp->count < argp->count && task->tk_status >= 0) {
1022 static unsigned long complain;
1023 if (time_before(complain, jiffies)) {
1025 "NFS: Server wrote less than requested.\n");
1026 complain = jiffies + 300 * HZ;
1028 /* Can't do anything about it right now except throw
1030 task->tk_status = -EIO;
1032 #ifdef CONFIG_NFS_V3
1033 if (resp->verf->committed < argp->stable && task->tk_status >= 0) {
1034 /* We tried a write call, but the server did not
1035 * commit data to stable storage even though we
1037 * Note: There is a known bug in Tru64 < 5.0 in which
1038 * the server reports NFS_DATA_SYNC, but performs
1039 * NFS_FILE_SYNC. We therefore implement this checking
1040 * as a dprintk() in order to avoid filling syslog.
1042 static unsigned long complain;
1044 if (time_before(complain, jiffies)) {
1045 dprintk("NFS: faulty NFSv3 server %s:"
1046 " (committed = %d) != (stable = %d)\n",
1047 NFS_SERVER(inode)->hostname,
1048 resp->verf->committed, argp->stable);
1049 complain = jiffies + 300 * HZ;
1055 * Update attributes as result of writeback.
1056 * FIXME: There is an inherent race with invalidate_inode_pages and
1057 * writebacks since the page->count is kept > 1 for as long
1058 * as the page has a write request pending.
1060 nfs_write_attributes(inode, resp->fattr);
1061 while (!list_empty(&data->pages)) {
1062 req = nfs_list_entry(data->pages.next);
1063 nfs_list_remove_request(req);
1064 page = req->wb_page;
1066 dprintk("NFS: write (%x/%Ld %d@%Ld)",
1067 req->wb_inode->i_dev,
1068 (long long)NFS_FILEID(req->wb_inode),
1070 (long long)(page_offset(page) + req->wb_offset));
1072 if (task->tk_status < 0) {
1073 ClearPageUptodate(page);
1076 req->wb_file->f_error = task->tk_status;
1077 nfs_inode_remove_request(req);
1078 dprintk(", error = %d\n", task->tk_status);
1082 #ifdef CONFIG_NFS_V3
1083 if (argp->stable != NFS_UNSTABLE || resp->verf->committed == NFS_FILE_SYNC) {
1084 nfs_inode_remove_request(req);
1088 memcpy(&req->wb_verf, resp->verf, sizeof(req->wb_verf));
1089 req->wb_timeout = jiffies + NFS_COMMIT_DELAY;
1090 nfs_mark_request_commit(req);
1091 dprintk(" marked for commit\n");
1093 nfs_inode_remove_request(req);
1096 nfs_unlock_request(req);
1101 #ifdef CONFIG_NFS_V3
1103 * Set up the argument/result storage required for the RPC call.
1106 nfs_commit_rpcsetup(struct list_head *head, struct nfs_write_data *data)
1108 struct nfs_page *first, *last;
1109 struct inode *inode;
1110 loff_t start, end, len;
1112 /* Set up the RPC argument and reply structs
1113 * NB: take care not to mess about with data->commit et al. */
1115 list_splice(head, &data->pages);
1116 INIT_LIST_HEAD(head);
1117 first = nfs_list_entry(data->pages.next);
1118 last = nfs_list_entry(data->pages.prev);
1119 inode = first->wb_inode;
1122 * Determine the offset range of requests in the COMMIT call.
1123 * We rely on the fact that data->pages is an ordered list...
1125 start = page_offset(first->wb_page) + first->wb_offset;
1126 end = page_offset(last->wb_page) + (last->wb_offset + last->wb_bytes);
1128 /* If 'len' is not a 32-bit quantity, pass '0' in the COMMIT call */
1129 if (end >= inode->i_size || len < 0 || len > (~((u32)0) >> 1))
1132 data->inode = inode;
1133 data->cred = first->wb_cred;
1134 data->args.fh = NFS_FH(inode);
1135 data->args.offset = start;
1136 data->res.count = data->args.count = (u32)len;
1137 data->res.fattr = &data->fattr;
1138 data->res.verf = &data->verf;
1142 * Commit dirty pages
1145 nfs_commit_list(struct list_head *head, int how)
1147 struct rpc_message msg;
1148 struct rpc_clnt *clnt;
1149 struct nfs_write_data *data;
1150 struct rpc_task *task;
1151 struct nfs_page *req;
1153 async = !(how & FLUSH_SYNC);
1156 data = nfs_writedata_alloc();
1162 flags = (async) ? RPC_TASK_ASYNC : 0;
1164 /* Set up the argument struct */
1165 nfs_commit_rpcsetup(head, data);
1166 req = nfs_list_entry(data->pages.next);
1167 clnt = NFS_CLIENT(req->wb_inode);
1169 rpc_init_task(task, clnt, nfs_commit_done, flags);
1170 task->tk_calldata = data;
1171 /* Release requests */
1172 task->tk_release = nfs_writedata_release;
1174 msg.rpc_proc = NFS3PROC_COMMIT;
1175 msg.rpc_argp = &data->args;
1176 msg.rpc_resp = &data->res;
1177 msg.rpc_cred = data->cred;
1179 dprintk("NFS: %4d initiated commit call\n", task->tk_pid);
1180 rpc_clnt_sigmask(clnt, &oldset);
1181 rpc_call_setup(task, &msg, 0);
1185 rpc_clnt_sigunmask(clnt, &oldset);
1188 while (!list_empty(head)) {
1189 req = nfs_list_entry(head->next);
1190 nfs_list_remove_request(req);
1191 nfs_mark_request_commit(req);
1192 nfs_unlock_request(req);
1198 * COMMIT call returned
1201 nfs_commit_done(struct rpc_task *task)
1203 struct nfs_write_data *data = (struct nfs_write_data *)task->tk_calldata;
1204 struct nfs_writeres *resp = &data->res;
1205 struct nfs_page *req;
1206 struct inode *inode = data->inode;
1208 dprintk("NFS: %4d nfs_commit_done (status %d)\n",
1209 task->tk_pid, task->tk_status);
1211 if (nfs_async_handle_jukebox(task))
1214 nfs_write_attributes(inode, resp->fattr);
1215 while (!list_empty(&data->pages)) {
1216 req = nfs_list_entry(data->pages.next);
1217 nfs_list_remove_request(req);
1219 dprintk("NFS: commit (%x/%Ld %d@%Ld)",
1220 req->wb_inode->i_dev,
1221 (long long)NFS_FILEID(req->wb_inode),
1223 (long long)(page_offset(req->wb_page) + req->wb_offset));
1224 if (task->tk_status < 0) {
1226 req->wb_file->f_error = task->tk_status;
1227 nfs_inode_remove_request(req);
1228 dprintk(", error = %d\n", task->tk_status);
1232 /* Okay, COMMIT succeeded, apparently. Check the verifier
1233 * returned by the server against all stored verfs. */
1234 if (!memcmp(req->wb_verf.verifier, data->verf.verifier, sizeof(data->verf.verifier))) {
1235 /* We have a match */
1236 nfs_inode_remove_request(req);
1240 /* We have a mismatch. Write the page again */
1241 dprintk(" mismatch\n");
1242 nfs_mark_request_dirty(req);
1244 nfs_unlock_request(req);
1249 int nfs_flush_file(struct inode *inode, struct file *file, unsigned long idx_start,
1250 unsigned int npages, int how)
1256 spin_lock(&nfs_wreq_lock);
1257 res = nfs_scan_dirty(inode, &head, file, idx_start, npages);
1258 spin_unlock(&nfs_wreq_lock);
1260 error = nfs_flush_list(&head, NFS_SERVER(inode)->wpages, how);
1266 #ifdef CONFIG_NFS_V3
1267 int nfs_commit_file(struct inode *inode, struct file *file, unsigned long idx_start,
1268 unsigned int npages, int how)
1274 spin_lock(&nfs_wreq_lock);
1275 res = nfs_scan_commit(inode, &head, file, idx_start, npages);
1276 spin_unlock(&nfs_wreq_lock);
1278 error = nfs_commit_list(&head, how);
1285 int nfs_sync_file(struct inode *inode, struct file *file, unsigned long idx_start,
1286 unsigned int npages, int how)
1291 wait = how & FLUSH_WAIT;
1295 inode = file->f_dentry->d_inode;
1300 error = nfs_wait_on_requests(inode, file, idx_start, npages);
1302 error = nfs_flush_file(inode, file, idx_start, npages, how);
1303 #ifdef CONFIG_NFS_V3
1305 error = nfs_commit_file(inode, file, idx_start, npages, how);
1307 } while (error > 0);
1311 int nfs_init_writepagecache(void)
1313 nfs_wdata_cachep = kmem_cache_create("nfs_write_data",
1314 sizeof(struct nfs_write_data),
1315 0, SLAB_HWCACHE_ALIGN,
1317 if (nfs_wdata_cachep == NULL)
1323 void nfs_destroy_writepagecache(void)
1325 if (kmem_cache_destroy(nfs_wdata_cachep))
1326 printk(KERN_INFO "nfs_write_data: not all structures were freed\n");