clean
[linux-2.4.21-pre4.git] / mm / shmem.c
1 /*
2  * Resizable virtual memory filesystem for Linux.
3  *
4  * Copyright (C) 2000 Linus Torvalds.
5  *               2000 Transmeta Corp.
6  *               2000-2001 Christoph Rohland
7  *               2000-2001 SAP AG
8  * 
9  * This file is released under the GPL.
10  */
11
12 /*
13  * This virtual memory filesystem is heavily based on the ramfs. It
14  * extends ramfs by the ability to use swap and honor resource limits
15  * which makes it a completely usable filesystem.
16  */
17
18 #include <linux/config.h>
19 #include <linux/module.h>
20 #include <linux/init.h>
21 #include <linux/devfs_fs_kernel.h>
22 #include <linux/fs.h>
23 #include <linux/mm.h>
24 #include <linux/file.h>
25 #include <linux/swap.h>
26 #include <linux/pagemap.h>
27 #include <linux/string.h>
28 #include <linux/locks.h>
29 #include <linux/smp_lock.h>
30
31 #include <asm/uaccess.h>
32
33 /* This magic number is used in glibc for posix shared memory */
34 #define TMPFS_MAGIC     0x01021994
35
36 #define ENTRIES_PER_PAGE (PAGE_CACHE_SIZE/sizeof(unsigned long))
37 #define BLOCKS_PER_PAGE  (PAGE_CACHE_SIZE/512)
38
39 #define SHMEM_MAX_INDEX  (SHMEM_NR_DIRECT + ENTRIES_PER_PAGE * (ENTRIES_PER_PAGE/2) * (ENTRIES_PER_PAGE+1))
40 #define SHMEM_MAX_BYTES  ((unsigned long long)SHMEM_MAX_INDEX << PAGE_CACHE_SHIFT)
41 #define VM_ACCT(size)    (((size) + PAGE_CACHE_SIZE - 1) >> PAGE_SHIFT)
42
43 /* Pretend that each entry is of this size in directory's i_size */
44 #define BOGO_DIRENT_SIZE 20
45
46 #define SHMEM_SB(sb) (&sb->u.shmem_sb)
47
48 static struct super_operations shmem_ops;
49 static struct address_space_operations shmem_aops;
50 static struct file_operations shmem_file_operations;
51 static struct inode_operations shmem_inode_operations;
52 static struct inode_operations shmem_dir_inode_operations;
53 static struct vm_operations_struct shmem_vm_ops;
54
55 LIST_HEAD (shmem_inodes);
56 static spinlock_t shmem_ilock = SPIN_LOCK_UNLOCKED;
57 atomic_t shmem_nrpages = ATOMIC_INIT(0); /* Not used right now */
58
59 static struct page *shmem_getpage_locked(struct shmem_inode_info *, struct inode *, unsigned long);
60
61 /*
62  * shmem_recalc_inode - recalculate the size of an inode
63  *
64  * @inode: inode to recalc
65  * @swap:  additional swap pages freed externally
66  *
67  * We have to calculate the free blocks since the mm can drop pages
68  * behind our back
69  *
70  * But we know that normally
71  * inodes->i_blocks/BLOCKS_PER_PAGE == 
72  *                      inode->i_mapping->nrpages + info->swapped
73  *
74  * So the mm freed 
75  * inodes->i_blocks/BLOCKS_PER_PAGE - 
76  *                      (inode->i_mapping->nrpages + info->swapped)
77  *
78  * It has to be called with the spinlock held.
79  */
80
81 static void shmem_recalc_inode(struct inode * inode)
82 {
83         unsigned long freed;
84
85         freed = (inode->i_blocks/BLOCKS_PER_PAGE) -
86                 (inode->i_mapping->nrpages + SHMEM_I(inode)->swapped);
87         if (freed){
88                 struct shmem_sb_info * sbinfo = SHMEM_SB(inode->i_sb);
89                 inode->i_blocks -= freed*BLOCKS_PER_PAGE;
90                 spin_lock (&sbinfo->stat_lock);
91                 sbinfo->free_blocks += freed;
92                 spin_unlock (&sbinfo->stat_lock);
93         }
94 }
95
96 /*
97  * shmem_swp_entry - find the swap vector position in the info structure
98  *
99  * @info:  info structure for the inode
100  * @index: index of the page to find
101  * @page:  optional page to add to the structure. Has to be preset to
102  *         all zeros
103  *
104  * If there is no space allocated yet it will return -ENOMEM when
105  * page == 0 else it will use the page for the needed block.
106  *
107  * returns -EFBIG if the index is too big.
108  *
109  *
110  * The swap vector is organized the following way:
111  *
112  * There are SHMEM_NR_DIRECT entries directly stored in the
113  * shmem_inode_info structure. So small files do not need an addional
114  * allocation.
115  *
116  * For pages with index > SHMEM_NR_DIRECT there is the pointer
117  * i_indirect which points to a page which holds in the first half
118  * doubly indirect blocks, in the second half triple indirect blocks:
119  *
120  * For an artificial ENTRIES_PER_PAGE = 4 this would lead to the
121  * following layout (for SHMEM_NR_DIRECT == 16):
122  *
123  * i_indirect -> dir --> 16-19
124  *            |      +-> 20-23
125  *            |
126  *            +-->dir2 --> 24-27
127  *            |        +-> 28-31
128  *            |        +-> 32-35
129  *            |        +-> 36-39
130  *            |
131  *            +-->dir3 --> 40-43
132  *                     +-> 44-47
133  *                     +-> 48-51
134  *                     +-> 52-55
135  */
136 static swp_entry_t * shmem_swp_entry (struct shmem_inode_info *info, unsigned long index, unsigned long page) 
137 {
138         unsigned long offset;
139         void **dir;
140
141         if (index < SHMEM_NR_DIRECT)
142                 return info->i_direct+index;
143
144         index -= SHMEM_NR_DIRECT;
145         offset = index % ENTRIES_PER_PAGE;
146         index /= ENTRIES_PER_PAGE;
147
148         if (!info->i_indirect) {
149                 info->i_indirect = (void *) page;
150                 return ERR_PTR(-ENOMEM);
151         }
152
153         dir = info->i_indirect + index;
154         if (index >= ENTRIES_PER_PAGE/2) {
155                 index -= ENTRIES_PER_PAGE/2;
156                 dir = info->i_indirect + ENTRIES_PER_PAGE/2 
157                         + index/ENTRIES_PER_PAGE;
158                 index %= ENTRIES_PER_PAGE;
159
160                 if(!*dir) {
161                         *dir = (void *) page;
162                         /* We return since we will need another page
163                            in the next step */
164                         return ERR_PTR(-ENOMEM);
165                 }
166                 dir = ((void **)*dir) + index;
167         }
168         if (!*dir) {
169                 if (!page)
170                         return ERR_PTR(-ENOMEM);
171                 *dir = (void *)page;
172         }
173         return ((swp_entry_t *)*dir) + offset;
174 }
175
176 /*
177  * shmem_alloc_entry - get the position of the swap entry for the
178  *                     page. If it does not exist allocate the entry
179  *
180  * @info:       info structure for the inode
181  * @index:      index of the page to find
182  */
183 static inline swp_entry_t * shmem_alloc_entry (struct shmem_inode_info *info, unsigned long index)
184 {
185         unsigned long page = 0;
186         swp_entry_t * res;
187
188         if (index >= SHMEM_MAX_INDEX)
189                 return ERR_PTR(-EFBIG);
190
191         if (info->next_index <= index)
192                 info->next_index = index + 1;
193
194         while ((res = shmem_swp_entry(info,index,page)) == ERR_PTR(-ENOMEM)) {
195                 page = get_zeroed_page(GFP_USER);
196                 if (!page)
197                         break;
198         }
199         return res;
200 }
201
202 /*
203  * shmem_free_swp - free some swap entries in a directory
204  *
205  * @dir:   pointer to the directory
206  * @count: number of entries to scan
207  */
208 static int shmem_free_swp(swp_entry_t *dir, unsigned int count)
209 {
210         swp_entry_t *ptr, entry;
211         int freed = 0;
212
213         for (ptr = dir; ptr < dir + count; ptr++) {
214                 if (!ptr->val)
215                         continue;
216                 entry = *ptr;
217                 *ptr = (swp_entry_t){0};
218                 freed++;
219                 free_swap_and_cache(entry);
220         }
221         return freed;
222 }
223
224 /*
225  * shmem_truncate_direct - free the swap entries of a whole doubly
226  *                         indirect block
227  *
228  * @dir:        pointer to the pointer to the block
229  * @start:      offset to start from (in pages)
230  * @len:        how many pages are stored in this block
231  *
232  * Returns the number of freed swap entries.
233  */
234
235 static inline unsigned long 
236 shmem_truncate_direct(swp_entry_t *** dir, unsigned long start, unsigned long len) {
237         swp_entry_t **last, **ptr;
238         unsigned long off, freed = 0;
239  
240         if (!*dir)
241                 return 0;
242
243         last = *dir + (len + ENTRIES_PER_PAGE-1) / ENTRIES_PER_PAGE;
244         off = start % ENTRIES_PER_PAGE;
245
246         for (ptr = *dir + start/ENTRIES_PER_PAGE; ptr < last; ptr++) {
247                 if (!*ptr) {
248                         off = 0;
249                         continue;
250                 }
251
252                 if (!off) {
253                         freed += shmem_free_swp(*ptr, ENTRIES_PER_PAGE);
254                         free_page ((unsigned long) *ptr);
255                         *ptr = 0;
256                 } else {
257                         freed += shmem_free_swp(*ptr+off,ENTRIES_PER_PAGE-off);
258                         off = 0;
259                 }
260         }
261         
262         if (!start) {
263                 free_page((unsigned long) *dir);
264                 *dir = 0;
265         }
266         return freed;
267 }
268
269 /*
270  * shmem_truncate_indirect - truncate an inode
271  *
272  * @info:  the info structure of the inode
273  * @index: the index to truncate
274  *
275  * This function locates the last doubly indirect block and calls
276  * then shmem_truncate_direct to do the real work
277  */
278 static inline unsigned long
279 shmem_truncate_indirect(struct shmem_inode_info *info, unsigned long index)
280 {
281         swp_entry_t ***base;
282         unsigned long baseidx, len, start;
283         unsigned long max = info->next_index-1;
284
285         if (max < SHMEM_NR_DIRECT) {
286                 info->next_index = index;
287                 return shmem_free_swp(info->i_direct + index,
288                                       SHMEM_NR_DIRECT - index);
289         }
290
291         if (max < ENTRIES_PER_PAGE * ENTRIES_PER_PAGE/2 + SHMEM_NR_DIRECT) {
292                 max -= SHMEM_NR_DIRECT;
293                 base = (swp_entry_t ***) &info->i_indirect;
294                 baseidx = SHMEM_NR_DIRECT;
295                 len = max+1;
296         } else {
297                 max -= ENTRIES_PER_PAGE*ENTRIES_PER_PAGE/2+SHMEM_NR_DIRECT;
298                 if (max >= ENTRIES_PER_PAGE*ENTRIES_PER_PAGE*ENTRIES_PER_PAGE/2)
299                         BUG();
300
301                 baseidx = max & ~(ENTRIES_PER_PAGE*ENTRIES_PER_PAGE-1);
302                 base = (swp_entry_t ***) info->i_indirect + ENTRIES_PER_PAGE/2 + baseidx/ENTRIES_PER_PAGE/ENTRIES_PER_PAGE ;
303                 len = max - baseidx + 1;
304                 baseidx += ENTRIES_PER_PAGE*ENTRIES_PER_PAGE/2+SHMEM_NR_DIRECT;
305         }
306
307         if (index > baseidx) {
308                 info->next_index = index;
309                 start = index - baseidx;
310         } else {
311                 info->next_index = baseidx;
312                 start = 0;
313         }
314         return shmem_truncate_direct(base, start, len);
315 }
316
317 static void shmem_truncate (struct inode * inode)
318 {
319         unsigned long index;
320         unsigned long partial;
321         unsigned long freed = 0;
322         struct shmem_inode_info * info = SHMEM_I(inode);
323
324         down(&info->sem);
325         inode->i_ctime = inode->i_mtime = CURRENT_TIME;
326         spin_lock (&info->lock);
327         index = (inode->i_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
328         partial = inode->i_size & ~PAGE_CACHE_MASK;
329
330         if (partial) {
331                 swp_entry_t *entry = shmem_swp_entry(info, index-1, 0);
332                 struct page *page;
333                 /*
334                  * This check is racy: it's faintly possible that page
335                  * was assigned to swap during truncate_inode_pages,
336                  * and now assigned to file; but better than nothing.
337                  */
338                 if (!IS_ERR(entry) && entry->val) {
339                         spin_unlock(&info->lock);
340                         page = shmem_getpage_locked(info, inode, index-1);
341                         if (!IS_ERR(page)) {
342                                 memclear_highpage_flush(page, partial,
343                                         PAGE_CACHE_SIZE - partial);
344                                 UnlockPage(page);
345                                 page_cache_release(page);
346                         }
347                         spin_lock(&info->lock);
348                 }
349         }
350
351         while (index < info->next_index) 
352                 freed += shmem_truncate_indirect(info, index);
353
354         info->swapped -= freed;
355         shmem_recalc_inode(inode);
356         spin_unlock (&info->lock);
357         up(&info->sem);
358 }
359
360 static void shmem_delete_inode(struct inode * inode)
361 {
362         struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
363
364         if (inode->i_op->truncate == shmem_truncate) {
365                 spin_lock (&shmem_ilock);
366                 list_del (&SHMEM_I(inode)->list);
367                 spin_unlock (&shmem_ilock);
368                 inode->i_size = 0;
369                 shmem_truncate (inode);
370         }
371         spin_lock (&sbinfo->stat_lock);
372         sbinfo->free_inodes++;
373         spin_unlock (&sbinfo->stat_lock);
374         clear_inode(inode);
375 }
376
377 static inline int shmem_find_swp(swp_entry_t entry, swp_entry_t *ptr, swp_entry_t *eptr)
378 {
379         swp_entry_t *test;
380
381         for (test = ptr; test < eptr; test++) {
382                 if (test->val == entry.val)
383                         return test - ptr;
384         }
385         return -1;
386 }
387
388 static int shmem_unuse_inode(struct shmem_inode_info *info, swp_entry_t entry, struct page *page)
389 {
390         swp_entry_t *ptr;
391         unsigned long idx;
392         int offset;
393
394         idx = 0;
395         ptr = info->i_direct;
396         spin_lock (&info->lock);
397         offset = info->next_index;
398         if (offset > SHMEM_NR_DIRECT)
399                 offset = SHMEM_NR_DIRECT;
400         offset = shmem_find_swp(entry, ptr, ptr + offset);
401         if (offset >= 0)
402                 goto found;
403
404         for (idx = SHMEM_NR_DIRECT; idx < info->next_index; 
405              idx += ENTRIES_PER_PAGE) {
406                 ptr = shmem_swp_entry(info, idx, 0);
407                 if (IS_ERR(ptr))
408                         continue;
409                 offset = info->next_index - idx;
410                 if (offset > ENTRIES_PER_PAGE)
411                         offset = ENTRIES_PER_PAGE;
412                 offset = shmem_find_swp(entry, ptr, ptr + offset);
413                 if (offset >= 0)
414                         goto found;
415         }
416         spin_unlock (&info->lock);
417         return 0;
418 found:
419         swap_free(entry);
420         ptr[offset] = (swp_entry_t) {0};
421         delete_from_swap_cache(page);
422         add_to_page_cache(page, info->inode->i_mapping, offset + idx);
423         SetPageDirty(page);
424         SetPageUptodate(page);
425         info->swapped--;
426         spin_unlock(&info->lock);
427         return 1;
428 }
429
430 /*
431  * shmem_unuse() search for an eventually swapped out shmem page.
432  */
433 void shmem_unuse(swp_entry_t entry, struct page *page)
434 {
435         struct list_head *p;
436         struct shmem_inode_info * info;
437
438         spin_lock (&shmem_ilock);
439         list_for_each(p, &shmem_inodes) {
440                 info = list_entry(p, struct shmem_inode_info, list);
441
442                 if (info->swapped && shmem_unuse_inode(info, entry, page)) {
443                         /* move head to start search for next from here */
444                         list_del(&shmem_inodes);
445                         list_add_tail(&shmem_inodes, p);
446                         break;
447                 }
448         }
449         spin_unlock (&shmem_ilock);
450 }
451
452 /*
453  * Move the page from the page cache to the swap cache.
454  *
455  * The page lock prevents multiple occurences of shmem_writepage at
456  * once.  We still need to guard against racing with
457  * shmem_getpage_locked().  
458  */
459 static int shmem_writepage(struct page * page)
460 {
461         struct shmem_inode_info *info;
462         swp_entry_t *entry, swap;
463         struct address_space *mapping;
464         unsigned long index;
465         struct inode *inode;
466
467         if (!PageLocked(page))
468                 BUG();
469         if (!PageLaunder(page))
470                 return fail_writepage(page);
471
472         mapping = page->mapping;
473         index = page->index;
474         inode = mapping->host;
475         info = SHMEM_I(inode);
476         if (info->locked)
477                 return fail_writepage(page);
478 getswap:
479         swap = get_swap_page();
480         if (!swap.val)
481                 return fail_writepage(page);
482
483         spin_lock(&info->lock);
484         entry = shmem_swp_entry(info, index, 0);
485         if (IS_ERR(entry))      /* this had been allocated on page allocation */
486                 BUG();
487         shmem_recalc_inode(inode);
488         if (entry->val)
489                 BUG();
490
491         /* Remove it from the page cache */
492         remove_inode_page(page);
493         page_cache_release(page);
494
495         /* Add it to the swap cache */
496         if (add_to_swap_cache(page, swap) != 0) {
497                 /*
498                  * Raced with "speculative" read_swap_cache_async.
499                  * Add page back to page cache, unref swap, try again.
500                  */
501                 add_to_page_cache_locked(page, mapping, index);
502                 spin_unlock(&info->lock);
503                 swap_free(swap);
504                 goto getswap;
505         }
506
507         *entry = swap;
508         info->swapped++;
509         spin_unlock(&info->lock);
510         SetPageUptodate(page);
511         set_page_dirty(page);
512         UnlockPage(page);
513         return 0;
514 }
515
516 /*
517  * shmem_getpage_locked - either get the page from swap or allocate a new one
518  *
519  * If we allocate a new one we do not mark it dirty. That's up to the
520  * vm. If we swap it in we mark it dirty since we also free the swap
521  * entry since a page cannot live in both the swap and page cache
522  *
523  * Called with the inode locked, so it cannot race with itself, but we
524  * still need to guard against racing with shm_writepage(), which might
525  * be trying to move the page to the swap cache as we run.
526  */
527 static struct page * shmem_getpage_locked(struct shmem_inode_info *info, struct inode * inode, unsigned long idx)
528 {
529         struct address_space * mapping = inode->i_mapping;
530         struct shmem_sb_info *sbinfo;
531         struct page * page;
532         swp_entry_t *entry;
533
534 repeat:
535         page = find_lock_page(mapping, idx);
536         if (page)
537                 return page;
538
539         entry = shmem_alloc_entry (info, idx);
540         if (IS_ERR(entry))
541                 return (void *)entry;
542
543         spin_lock (&info->lock);
544         
545         /* The shmem_alloc_entry() call may have blocked, and
546          * shmem_writepage may have been moving a page between the page
547          * cache and swap cache.  We need to recheck the page cache
548          * under the protection of the info->lock spinlock. */
549
550         page = find_get_page(mapping, idx);
551         if (page) {
552                 if (TryLockPage(page))
553                         goto wait_retry;
554                 spin_unlock (&info->lock);
555                 return page;
556         }
557         
558         shmem_recalc_inode(inode);
559         if (entry->val) {
560                 unsigned long flags;
561
562                 /* Look it up and read it in.. */
563                 page = lookup_swap_cache(*entry);
564                 if (!page) {
565                         swp_entry_t swap = *entry;
566                         spin_unlock (&info->lock);
567                         swapin_readahead(*entry);
568                         page = read_swap_cache_async(*entry);
569                         if (!page) {
570                                 if (entry->val != swap.val)
571                                         goto repeat;
572                                 return ERR_PTR(-ENOMEM);
573                         }
574                         wait_on_page(page);
575                         if (!Page_Uptodate(page) && entry->val == swap.val) {
576                                 page_cache_release(page);
577                                 return ERR_PTR(-EIO);
578                         }
579                         
580                         /* Too bad we can't trust this page, because we
581                          * dropped the info->lock spinlock */
582                         page_cache_release(page);
583                         goto repeat;
584                 }
585
586                 /* We have to this with page locked to prevent races */
587                 if (TryLockPage(page)) 
588                         goto wait_retry;
589
590                 swap_free(*entry);
591                 *entry = (swp_entry_t) {0};
592                 delete_from_swap_cache(page);
593                 flags = page->flags & ~((1 << PG_uptodate) | (1 << PG_error) | (1 << PG_referenced) | (1 << PG_arch_1));
594                 page->flags = flags | (1 << PG_dirty);
595                 add_to_page_cache_locked(page, mapping, idx);
596                 info->swapped--;
597                 spin_unlock (&info->lock);
598         } else {
599                 sbinfo = SHMEM_SB(inode->i_sb);
600                 spin_unlock (&info->lock);
601                 spin_lock (&sbinfo->stat_lock);
602                 if (sbinfo->free_blocks == 0)
603                         goto no_space;
604                 sbinfo->free_blocks--;
605                 spin_unlock (&sbinfo->stat_lock);
606
607                 /* Ok, get a new page.  We don't have to worry about the
608                  * info->lock spinlock here: we cannot race against
609                  * shm_writepage because we have already verified that
610                  * there is no page present either in memory or in the
611                  * swap cache, so we are guaranteed to be populating a
612                  * new shm entry.  The inode semaphore we already hold
613                  * is enough to make this atomic. */
614                 page = page_cache_alloc(mapping);
615                 if (!page)
616                         return ERR_PTR(-ENOMEM);
617                 clear_highpage(page);
618                 flush_dcache_page(page);
619                 inode->i_blocks += BLOCKS_PER_PAGE;
620                 add_to_page_cache (page, mapping, idx);
621         }
622
623         /* We have the page */
624         SetPageUptodate(page);
625         return page;
626 no_space:
627         spin_unlock (&sbinfo->stat_lock);
628         return ERR_PTR(-ENOSPC);
629
630 wait_retry:
631         spin_unlock (&info->lock);
632         wait_on_page(page);
633         page_cache_release(page);
634         goto repeat;
635 }
636
637 static int shmem_getpage(struct inode * inode, unsigned long idx, struct page **ptr)
638 {
639         struct shmem_inode_info *info = SHMEM_I(inode);
640         int error;
641
642         down (&info->sem);
643         *ptr = ERR_PTR(-EFAULT);
644         if (inode->i_size <= (loff_t) idx * PAGE_CACHE_SIZE)
645                 goto failed;
646
647         *ptr = shmem_getpage_locked(info, inode, idx);
648         if (IS_ERR (*ptr))
649                 goto failed;
650
651         UnlockPage(*ptr);
652         up (&info->sem);
653         return 0;
654 failed:
655         up (&info->sem);
656         error = PTR_ERR(*ptr);
657         *ptr = NOPAGE_SIGBUS;
658         if (error == -ENOMEM)
659                 *ptr = NOPAGE_OOM;
660         return error;
661 }
662
663 struct page * shmem_nopage(struct vm_area_struct * vma, unsigned long address, int unused)
664 {
665         struct page * page;
666         unsigned int idx;
667         struct inode * inode = vma->vm_file->f_dentry->d_inode;
668
669         idx = (address - vma->vm_start) >> PAGE_CACHE_SHIFT;
670         idx += vma->vm_pgoff;
671
672         if (shmem_getpage(inode, idx, &page))
673                 return page;
674
675         flush_page_to_ram(page);
676         return(page);
677 }
678
679 void shmem_lock(struct file * file, int lock)
680 {
681         struct inode * inode = file->f_dentry->d_inode;
682         struct shmem_inode_info * info = SHMEM_I(inode);
683
684         down(&info->sem);
685         info->locked = lock;
686         up(&info->sem);
687 }
688
689 static int shmem_mmap(struct file * file, struct vm_area_struct * vma)
690 {
691         struct vm_operations_struct * ops;
692         struct inode *inode = file->f_dentry->d_inode;
693
694         ops = &shmem_vm_ops;
695         if (!inode->i_sb || !S_ISREG(inode->i_mode))
696                 return -EACCES;
697         UPDATE_ATIME(inode);
698         vma->vm_ops = ops;
699         return 0;
700 }
701
702 struct inode *shmem_get_inode(struct super_block *sb, int mode, int dev)
703 {
704         struct inode * inode;
705         struct shmem_inode_info *info;
706         struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
707
708         spin_lock (&sbinfo->stat_lock);
709         if (!sbinfo->free_inodes) {
710                 spin_unlock (&sbinfo->stat_lock);
711                 return NULL;
712         }
713         sbinfo->free_inodes--;
714         spin_unlock (&sbinfo->stat_lock);
715
716         inode = new_inode(sb);
717         if (inode) {
718                 inode->i_mode = mode;
719                 inode->i_uid = current->fsuid;
720                 inode->i_gid = current->fsgid;
721                 inode->i_blksize = PAGE_CACHE_SIZE;
722                 inode->i_blocks = 0;
723                 inode->i_rdev = NODEV;
724                 inode->i_mapping->a_ops = &shmem_aops;
725                 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
726                 info = SHMEM_I(inode);
727                 info->inode = inode;
728                 spin_lock_init (&info->lock);
729                 sema_init (&info->sem, 1);
730                 switch (mode & S_IFMT) {
731                 default:
732                         init_special_inode(inode, mode, dev);
733                         break;
734                 case S_IFREG:
735                         inode->i_op = &shmem_inode_operations;
736                         inode->i_fop = &shmem_file_operations;
737                         spin_lock (&shmem_ilock);
738                         list_add_tail(&info->list, &shmem_inodes);
739                         spin_unlock (&shmem_ilock);
740                         break;
741                 case S_IFDIR:
742                         inode->i_nlink++;
743                         /* Some things misbehave if size == 0 on a directory */
744                         inode->i_size = 2 * BOGO_DIRENT_SIZE;
745                         inode->i_op = &shmem_dir_inode_operations;
746                         inode->i_fop = &dcache_dir_ops;
747                         break;
748                 case S_IFLNK:
749                         break;
750                 }
751         }
752         return inode;
753 }
754
755 static int shmem_set_size(struct shmem_sb_info *info,
756                           unsigned long max_blocks, unsigned long max_inodes)
757 {
758         int error;
759         unsigned long blocks, inodes;
760
761         spin_lock(&info->stat_lock);
762         blocks = info->max_blocks - info->free_blocks;
763         inodes = info->max_inodes - info->free_inodes;
764         error = -EINVAL;
765         if (max_blocks < blocks)
766                 goto out;
767         if (max_inodes < inodes)
768                 goto out;
769         error = 0;
770         info->max_blocks  = max_blocks;
771         info->free_blocks = max_blocks - blocks;
772         info->max_inodes  = max_inodes;
773         info->free_inodes = max_inodes - inodes;
774 out:
775         spin_unlock(&info->stat_lock);
776         return error;
777 }
778
779 #ifdef CONFIG_TMPFS
780
781 static struct inode_operations shmem_symlink_inode_operations;
782 static struct inode_operations shmem_symlink_inline_operations;
783
784 static ssize_t
785 shmem_file_write(struct file *file,const char *buf,size_t count,loff_t *ppos)
786 {
787         struct inode    *inode = file->f_dentry->d_inode; 
788         struct shmem_inode_info *info;
789         unsigned long   limit = current->rlim[RLIMIT_FSIZE].rlim_cur;
790         loff_t          pos;
791         struct page     *page;
792         unsigned long   written;
793         long            status;
794         int             err;
795
796         if ((ssize_t) count < 0)
797                 return -EINVAL;
798
799         if (!access_ok(VERIFY_READ, buf, count))
800                 return -EFAULT;
801
802         down(&inode->i_sem);
803
804         pos = *ppos;
805         err = -EINVAL;
806         if (pos < 0)
807                 goto out;
808
809         err = file->f_error;
810         if (err) {
811                 file->f_error = 0;
812                 goto out;
813         }
814
815         written = 0;
816
817         if (file->f_flags & O_APPEND)
818                 pos = inode->i_size;
819
820         /*
821          * Check whether we've reached the file size limit.
822          */
823         err = -EFBIG;
824         if (limit != RLIM_INFINITY) {
825                 if (pos >= limit) {
826                         send_sig(SIGXFSZ, current, 0);
827                         goto out;
828                 }
829                 if (count > limit - pos) {
830                         send_sig(SIGXFSZ, current, 0);
831                         count = limit - pos;
832                 }
833         }
834
835         status  = 0;
836         if (count) {
837                 remove_suid(inode);
838                 inode->i_ctime = inode->i_mtime = CURRENT_TIME;
839         }
840
841         while (count) {
842                 unsigned long bytes, index, offset;
843                 char *kaddr;
844
845                 /*
846                  * Try to find the page in the cache. If it isn't there,
847                  * allocate a free page.
848                  */
849                 offset = (pos & (PAGE_CACHE_SIZE -1)); /* Within page */
850                 index = pos >> PAGE_CACHE_SHIFT;
851                 bytes = PAGE_CACHE_SIZE - offset;
852                 if (bytes > count) {
853                         bytes = count;
854                 }
855
856                 /*
857                  * Bring in the user page that we will copy from _first_.
858                  * Otherwise there's a nasty deadlock on copying from the
859                  * same page as we're writing to, without it being marked
860                  * up-to-date.
861                  */
862                 { volatile unsigned char dummy;
863                         __get_user(dummy, buf);
864                         __get_user(dummy, buf+bytes-1);
865                 }
866
867                 info = SHMEM_I(inode);
868                 down (&info->sem);
869                 page = shmem_getpage_locked(info, inode, index);
870                 up (&info->sem);
871
872                 status = PTR_ERR(page);
873                 if (IS_ERR(page))
874                         break;
875
876                 /* We have exclusive IO access to the page.. */
877                 if (!PageLocked(page)) {
878                         PAGE_BUG(page);
879                 }
880
881                 kaddr = kmap(page);
882                 status = copy_from_user(kaddr+offset, buf, bytes);
883                 kunmap(page);
884                 if (status)
885                         goto fail_write;
886
887                 flush_dcache_page(page);
888                 if (bytes > 0) {
889                         SetPageDirty(page);
890                         written += bytes;
891                         count -= bytes;
892                         pos += bytes;
893                         buf += bytes;
894                         if (pos > inode->i_size) 
895                                 inode->i_size = pos;
896                 }
897 unlock:
898                 /* Mark it unlocked again and drop the page.. */
899                 UnlockPage(page);
900                 page_cache_release(page);
901
902                 if (status < 0)
903                         break;
904         }
905         *ppos = pos;
906
907         err = written ? written : status;
908 out:
909         up(&inode->i_sem);
910         return err;
911 fail_write:
912         status = -EFAULT;
913         ClearPageUptodate(page);
914         goto unlock;
915 }
916
917 static void do_shmem_file_read(struct file * filp, loff_t *ppos, read_descriptor_t * desc)
918 {
919         struct inode *inode = filp->f_dentry->d_inode;
920         struct address_space *mapping = inode->i_mapping;
921         unsigned long index, offset;
922
923         index = *ppos >> PAGE_CACHE_SHIFT;
924         offset = *ppos & ~PAGE_CACHE_MASK;
925
926         for (;;) {
927                 struct page *page;
928                 unsigned long end_index, nr, ret;
929
930                 end_index = inode->i_size >> PAGE_CACHE_SHIFT;
931                 if (index > end_index)
932                         break;
933                 nr = PAGE_CACHE_SIZE;
934                 if (index == end_index) {
935                         nr = inode->i_size & ~PAGE_CACHE_MASK;
936                         if (nr <= offset)
937                                 break;
938                 }
939
940                 nr = nr - offset;
941
942                 if ((desc->error = shmem_getpage(inode, index, &page)))
943                         break;
944
945                 if (mapping->i_mmap_shared != NULL)
946                         flush_dcache_page(page);
947
948                 /*
949                  * Ok, we have the page, and it's up-to-date, so
950                  * now we can copy it to user space...
951                  *
952                  * The actor routine returns how many bytes were actually used..
953                  * NOTE! This may not be the same as how much of a user buffer
954                  * we filled up (we may be padding etc), so we can only update
955                  * "pos" here (the actor routine has to update the user buffer
956                  * pointers and the remaining count).
957                  */
958                 ret = file_read_actor(desc, page, offset, nr);
959                 offset += ret;
960                 index += offset >> PAGE_CACHE_SHIFT;
961                 offset &= ~PAGE_CACHE_MASK;
962         
963                 page_cache_release(page);
964                 if (ret != nr || !desc->count)
965                         break;
966         }
967
968         *ppos = ((loff_t) index << PAGE_CACHE_SHIFT) + offset;
969         UPDATE_ATIME(inode);
970 }
971
972 static ssize_t shmem_file_read(struct file * filp, char * buf, size_t count, loff_t *ppos)
973 {
974         ssize_t retval;
975
976         retval = -EFAULT;
977         if (access_ok(VERIFY_WRITE, buf, count)) {
978                 retval = 0;
979
980                 if (count) {
981                         read_descriptor_t desc;
982
983                         desc.written = 0;
984                         desc.count = count;
985                         desc.buf = buf;
986                         desc.error = 0;
987                         do_shmem_file_read(filp, ppos, &desc);
988
989                         retval = desc.written;
990                         if (!retval)
991                                 retval = desc.error;
992                 }
993         }
994         return retval;
995 }
996
997 static int shmem_statfs(struct super_block *sb, struct statfs *buf)
998 {
999         struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
1000
1001         buf->f_type = TMPFS_MAGIC;
1002         buf->f_bsize = PAGE_CACHE_SIZE;
1003         spin_lock (&sbinfo->stat_lock);
1004         buf->f_blocks = sbinfo->max_blocks;
1005         buf->f_bavail = buf->f_bfree = sbinfo->free_blocks;
1006         buf->f_files = sbinfo->max_inodes;
1007         buf->f_ffree = sbinfo->free_inodes;
1008         spin_unlock (&sbinfo->stat_lock);
1009         buf->f_namelen = NAME_MAX;
1010         return 0;
1011 }
1012
1013 /*
1014  * Lookup the data. This is trivial - if the dentry didn't already
1015  * exist, we know it is negative.
1016  */
1017 static struct dentry * shmem_lookup(struct inode *dir, struct dentry *dentry)
1018 {
1019         d_add(dentry, NULL);
1020         return NULL;
1021 }
1022
1023 /*
1024  * File creation. Allocate an inode, and we're done..
1025  */
1026 static int shmem_mknod(struct inode *dir, struct dentry *dentry, int mode, int dev)
1027 {
1028         struct inode * inode = shmem_get_inode(dir->i_sb, mode, dev);
1029         int error = -ENOSPC;
1030
1031         if (inode) {
1032                 dir->i_size += BOGO_DIRENT_SIZE;
1033                 dir->i_ctime = dir->i_mtime = CURRENT_TIME;
1034                 d_instantiate(dentry, inode);
1035                 dget(dentry); /* Extra count - pin the dentry in core */
1036                 error = 0;
1037         }
1038         return error;
1039 }
1040
1041 static int shmem_mkdir(struct inode * dir, struct dentry * dentry, int mode)
1042 {
1043         int error;
1044
1045         if ((error = shmem_mknod(dir, dentry, mode | S_IFDIR, 0)))
1046                 return error;
1047         dir->i_nlink++;
1048         return 0;
1049 }
1050
1051 static int shmem_create(struct inode *dir, struct dentry *dentry, int mode)
1052 {
1053         return shmem_mknod(dir, dentry, mode | S_IFREG, 0);
1054 }
1055
1056 /*
1057  * Link a file..
1058  */
1059 static int shmem_link(struct dentry *old_dentry, struct inode * dir, struct dentry * dentry)
1060 {
1061         struct inode *inode = old_dentry->d_inode;
1062
1063         if (S_ISDIR(inode->i_mode))
1064                 return -EPERM;
1065
1066         dir->i_size += BOGO_DIRENT_SIZE;
1067         inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
1068         inode->i_nlink++;
1069         atomic_inc(&inode->i_count);    /* New dentry reference */
1070         dget(dentry);           /* Extra pinning count for the created dentry */
1071         d_instantiate(dentry, inode);
1072         return 0;
1073 }
1074
1075 static inline int shmem_positive(struct dentry *dentry)
1076 {
1077         return dentry->d_inode && !d_unhashed(dentry);
1078 }
1079
1080 /*
1081  * Check that a directory is empty (this works
1082  * for regular files too, they'll just always be
1083  * considered empty..).
1084  *
1085  * Note that an empty directory can still have
1086  * children, they just all have to be negative..
1087  */
1088 static int shmem_empty(struct dentry *dentry)
1089 {
1090         struct list_head *list;
1091
1092         spin_lock(&dcache_lock);
1093         list = dentry->d_subdirs.next;
1094
1095         while (list != &dentry->d_subdirs) {
1096                 struct dentry *de = list_entry(list, struct dentry, d_child);
1097
1098                 if (shmem_positive(de)) {
1099                         spin_unlock(&dcache_lock);
1100                         return 0;
1101                 }
1102                 list = list->next;
1103         }
1104         spin_unlock(&dcache_lock);
1105         return 1;
1106 }
1107
1108 static int shmem_unlink(struct inode * dir, struct dentry *dentry)
1109 {
1110         struct inode *inode = dentry->d_inode;
1111
1112         dir->i_size -= BOGO_DIRENT_SIZE;
1113         inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
1114         inode->i_nlink--;
1115         dput(dentry);   /* Undo the count from "create" - this does all the work */
1116         return 0;
1117 }
1118
1119 static int shmem_rmdir(struct inode * dir, struct dentry *dentry)
1120 {
1121         if (!shmem_empty(dentry))
1122                 return -ENOTEMPTY;
1123
1124         dir->i_nlink--;
1125         return shmem_unlink(dir, dentry);
1126 }
1127
1128 /*
1129  * The VFS layer already does all the dentry stuff for rename,
1130  * we just have to decrement the usage count for the target if
1131  * it exists so that the VFS layer correctly free's it when it
1132  * gets overwritten.
1133  */
1134 static int shmem_rename(struct inode * old_dir, struct dentry *old_dentry, struct inode * new_dir,struct dentry *new_dentry)
1135 {
1136         struct inode *inode = old_dentry->d_inode;
1137         int they_are_dirs = S_ISDIR(inode->i_mode);
1138
1139         if (!shmem_empty(new_dentry)) 
1140                 return -ENOTEMPTY;
1141
1142         if (new_dentry->d_inode) {
1143                 (void) shmem_unlink(new_dir, new_dentry);
1144                 if (they_are_dirs)
1145                         old_dir->i_nlink--;
1146         } else if (they_are_dirs) {
1147                 old_dir->i_nlink--;
1148                 new_dir->i_nlink++;
1149         }
1150
1151         old_dir->i_size -= BOGO_DIRENT_SIZE;
1152         new_dir->i_size += BOGO_DIRENT_SIZE;
1153         old_dir->i_ctime = old_dir->i_mtime =
1154         new_dir->i_ctime = new_dir->i_mtime =
1155         inode->i_ctime = CURRENT_TIME;
1156         return 0;
1157 }
1158
1159 static int shmem_symlink(struct inode * dir, struct dentry *dentry, const char * symname)
1160 {
1161         int len;
1162         struct inode *inode;
1163         struct page *page;
1164         char *kaddr;
1165         struct shmem_inode_info * info;
1166
1167         len = strlen(symname) + 1;
1168         if (len > PAGE_CACHE_SIZE)
1169                 return -ENAMETOOLONG;
1170
1171         inode = shmem_get_inode(dir->i_sb, S_IFLNK|S_IRWXUGO, 0);
1172         if (!inode)
1173                 return -ENOSPC;
1174
1175         info = SHMEM_I(inode);
1176         inode->i_size = len-1;
1177         if (len <= sizeof(struct shmem_inode_info)) {
1178                 /* do it inline */
1179                 memcpy(info, symname, len);
1180                 inode->i_op = &shmem_symlink_inline_operations;
1181         } else {
1182                 down(&info->sem);
1183                 page = shmem_getpage_locked(info, inode, 0);
1184                 if (IS_ERR(page)) {
1185                         up(&info->sem);
1186                         iput(inode);
1187                         return PTR_ERR(page);
1188                 }
1189                 inode->i_op = &shmem_symlink_inode_operations;
1190                 spin_lock (&shmem_ilock);
1191                 list_add_tail(&info->list, &shmem_inodes);
1192                 spin_unlock (&shmem_ilock);
1193                 kaddr = kmap(page);
1194                 memcpy(kaddr, symname, len);
1195                 kunmap(page);
1196                 SetPageDirty(page);
1197                 UnlockPage(page);
1198                 page_cache_release(page);
1199                 up(&info->sem);
1200         }
1201         dir->i_size += BOGO_DIRENT_SIZE;
1202         dir->i_ctime = dir->i_mtime = CURRENT_TIME;
1203         d_instantiate(dentry, inode);
1204         dget(dentry);
1205         return 0;
1206 }
1207
1208 static int shmem_readlink_inline(struct dentry *dentry, char *buffer, int buflen)
1209 {
1210         return vfs_readlink(dentry,buffer,buflen, (const char *)SHMEM_I(dentry->d_inode));
1211 }
1212
1213 static int shmem_follow_link_inline(struct dentry *dentry, struct nameidata *nd)
1214 {
1215         return vfs_follow_link(nd, (const char *)SHMEM_I(dentry->d_inode));
1216 }
1217
1218 static int shmem_readlink(struct dentry *dentry, char *buffer, int buflen)
1219 {
1220         struct page * page;
1221         int res = shmem_getpage(dentry->d_inode, 0, &page);
1222
1223         if (res)
1224                 return res;
1225
1226         res = vfs_readlink(dentry,buffer,buflen, kmap(page));
1227         kunmap(page);
1228         page_cache_release(page);
1229         return res;
1230 }
1231
1232 static int shmem_follow_link(struct dentry *dentry, struct nameidata *nd)
1233 {
1234         struct page * page;
1235         int res = shmem_getpage(dentry->d_inode, 0, &page);
1236         if (res)
1237                 return res;
1238
1239         res = vfs_follow_link(nd, kmap(page));
1240         kunmap(page);
1241         page_cache_release(page);
1242         return res;
1243 }
1244
1245 static struct inode_operations shmem_symlink_inline_operations = {
1246         readlink:       shmem_readlink_inline,
1247         follow_link:    shmem_follow_link_inline,
1248 };
1249
1250 static struct inode_operations shmem_symlink_inode_operations = {
1251         truncate:       shmem_truncate,
1252         readlink:       shmem_readlink,
1253         follow_link:    shmem_follow_link,
1254 };
1255
1256 static int shmem_parse_options(char *options, int *mode, uid_t *uid, gid_t *gid, unsigned long * blocks, unsigned long *inodes)
1257 {
1258         char *this_char, *value, *rest;
1259
1260         this_char = NULL;
1261         if ( options )
1262                 this_char = strtok(options,",");
1263         for ( ; this_char; this_char = strtok(NULL,",")) {
1264                 if ((value = strchr(this_char,'=')) != NULL) {
1265                         *value++ = 0;
1266                 } else {
1267                         printk(KERN_ERR 
1268                             "tmpfs: No value for mount option '%s'\n", 
1269                             this_char);
1270                         return 1;
1271                 }
1272
1273                 if (!strcmp(this_char,"size")) {
1274                         unsigned long long size;
1275                         size = memparse(value,&rest);
1276                         if (*rest)
1277                                 goto bad_val;
1278                         *blocks = size >> PAGE_CACHE_SHIFT;
1279                 } else if (!strcmp(this_char,"nr_blocks")) {
1280                         *blocks = memparse(value,&rest);
1281                         if (*rest)
1282                                 goto bad_val;
1283                 } else if (!strcmp(this_char,"nr_inodes")) {
1284                         *inodes = memparse(value,&rest);
1285                         if (*rest)
1286                                 goto bad_val;
1287                 } else if (!strcmp(this_char,"mode")) {
1288                         if (!mode)
1289                                 continue;
1290                         *mode = simple_strtoul(value,&rest,8);
1291                         if (*rest)
1292                                 goto bad_val;
1293                 } else if (!strcmp(this_char,"uid")) {
1294                         if (!uid)
1295                                 continue;
1296                         *uid = simple_strtoul(value,&rest,0);
1297                         if (*rest)
1298                                 goto bad_val;
1299                 } else if (!strcmp(this_char,"gid")) {
1300                         if (!gid)
1301                                 continue;
1302                         *gid = simple_strtoul(value,&rest,0);
1303                         if (*rest)
1304                                 goto bad_val;
1305                 } else {
1306                         printk(KERN_ERR "tmpfs: Bad mount option %s\n",
1307                                this_char);
1308                         return 1;
1309                 }
1310         }
1311         return 0;
1312
1313 bad_val:
1314         printk(KERN_ERR "tmpfs: Bad value '%s' for mount option '%s'\n", 
1315                value, this_char);
1316         return 1;
1317
1318 }
1319
1320 static int shmem_remount_fs (struct super_block *sb, int *flags, char *data)
1321 {
1322         struct shmem_sb_info *sbinfo = &sb->u.shmem_sb;
1323         unsigned long max_blocks = sbinfo->max_blocks;
1324         unsigned long max_inodes = sbinfo->max_inodes;
1325
1326         if (shmem_parse_options (data, NULL, NULL, NULL, &max_blocks, &max_inodes))
1327                 return -EINVAL;
1328         return shmem_set_size(sbinfo, max_blocks, max_inodes);
1329 }
1330
1331 int shmem_sync_file(struct file * file, struct dentry *dentry, int datasync)
1332 {
1333         return 0;
1334 }
1335 #endif
1336
1337 static struct super_block *shmem_read_super(struct super_block * sb, void * data, int silent)
1338 {
1339         struct inode * inode;
1340         struct dentry * root;
1341         unsigned long blocks, inodes;
1342         int mode   = S_IRWXUGO | S_ISVTX;
1343         uid_t uid = current->fsuid;
1344         gid_t gid = current->fsgid;
1345         struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
1346         struct sysinfo si;
1347
1348         /*
1349          * Per default we only allow half of the physical ram per
1350          * tmpfs instance
1351          */
1352         si_meminfo(&si);
1353         blocks = inodes = si.totalram / 2;
1354
1355 #ifdef CONFIG_TMPFS
1356         if (shmem_parse_options (data, &mode, &uid, &gid, &blocks, &inodes))
1357                 return NULL;
1358 #endif
1359
1360         spin_lock_init (&sbinfo->stat_lock);
1361         sbinfo->max_blocks = blocks;
1362         sbinfo->free_blocks = blocks;
1363         sbinfo->max_inodes = inodes;
1364         sbinfo->free_inodes = inodes;
1365         sb->s_maxbytes = SHMEM_MAX_BYTES;
1366         sb->s_blocksize = PAGE_CACHE_SIZE;
1367         sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
1368         sb->s_magic = TMPFS_MAGIC;
1369         sb->s_op = &shmem_ops;
1370         inode = shmem_get_inode(sb, S_IFDIR | mode, 0);
1371         if (!inode)
1372                 return NULL;
1373
1374         inode->i_uid = uid;
1375         inode->i_gid = gid;
1376         root = d_alloc_root(inode);
1377         if (!root) {
1378                 iput(inode);
1379                 return NULL;
1380         }
1381         sb->s_root = root;
1382         return sb;
1383 }
1384
1385
1386
1387 static struct address_space_operations shmem_aops = {
1388         writepage:      shmem_writepage,
1389 };
1390
1391 static struct file_operations shmem_file_operations = {
1392         mmap:   shmem_mmap,
1393 #ifdef CONFIG_TMPFS
1394         read:   shmem_file_read,
1395         write:  shmem_file_write,
1396         fsync:  shmem_sync_file,
1397 #endif
1398 };
1399
1400 static struct inode_operations shmem_inode_operations = {
1401         truncate:       shmem_truncate,
1402 };
1403
1404 static struct inode_operations shmem_dir_inode_operations = {
1405 #ifdef CONFIG_TMPFS
1406         create:         shmem_create,
1407         lookup:         shmem_lookup,
1408         link:           shmem_link,
1409         unlink:         shmem_unlink,
1410         symlink:        shmem_symlink,
1411         mkdir:          shmem_mkdir,
1412         rmdir:          shmem_rmdir,
1413         mknod:          shmem_mknod,
1414         rename:         shmem_rename,
1415 #endif
1416 };
1417
1418 static struct super_operations shmem_ops = {
1419 #ifdef CONFIG_TMPFS
1420         statfs:         shmem_statfs,
1421         remount_fs:     shmem_remount_fs,
1422 #endif
1423         delete_inode:   shmem_delete_inode,
1424         put_inode:      force_delete,   
1425 };
1426
1427 static struct vm_operations_struct shmem_vm_ops = {
1428         nopage: shmem_nopage,
1429 };
1430
1431 #ifdef CONFIG_TMPFS
1432 /* type "shm" will be tagged obsolete in 2.5 */
1433 static DECLARE_FSTYPE(shmem_fs_type, "shm", shmem_read_super, FS_LITTER);
1434 static DECLARE_FSTYPE(tmpfs_fs_type, "tmpfs", shmem_read_super, FS_LITTER);
1435 #else
1436 static DECLARE_FSTYPE(tmpfs_fs_type, "tmpfs", shmem_read_super, FS_LITTER|FS_NOMOUNT);
1437 #endif
1438 static struct vfsmount *shm_mnt;
1439
1440 static int __init init_shmem_fs(void)
1441 {
1442         int error;
1443         struct vfsmount * res;
1444
1445         if ((error = register_filesystem(&tmpfs_fs_type))) {
1446                 printk (KERN_ERR "Could not register tmpfs\n");
1447                 return error;
1448         }
1449 #ifdef CONFIG_TMPFS
1450         if ((error = register_filesystem(&shmem_fs_type))) {
1451                 printk (KERN_ERR "Could not register shm fs\n");
1452                 return error;
1453         }
1454         devfs_mk_dir (NULL, "shm", NULL);
1455 #endif
1456         res = kern_mount(&tmpfs_fs_type);
1457         if (IS_ERR (res)) {
1458                 printk (KERN_ERR "could not kern_mount tmpfs\n");
1459                 unregister_filesystem(&tmpfs_fs_type);
1460                 return PTR_ERR(res);
1461         }
1462         shm_mnt = res;
1463
1464         /* The internal instance should not do size checking */
1465         if ((error = shmem_set_size(SHMEM_SB(res->mnt_sb), ULONG_MAX, ULONG_MAX)))
1466                 printk (KERN_ERR "could not set limits on internal tmpfs\n");
1467
1468         return 0;
1469 }
1470
1471 static void __exit exit_shmem_fs(void)
1472 {
1473 #ifdef CONFIG_TMPFS
1474         unregister_filesystem(&shmem_fs_type);
1475 #endif
1476         unregister_filesystem(&tmpfs_fs_type);
1477         mntput(shm_mnt);
1478 }
1479
1480 module_init(init_shmem_fs)
1481 module_exit(exit_shmem_fs)
1482
1483 /*
1484  * shmem_file_setup - get an unlinked file living in shmem fs
1485  *
1486  * @name: name for dentry (to be seen in /proc/<pid>/maps
1487  * @size: size to be set for the file
1488  *
1489  */
1490 struct file *shmem_file_setup(char * name, loff_t size)
1491 {
1492         int error;
1493         struct file *file;
1494         struct inode * inode;
1495         struct dentry *dentry, *root;
1496         struct qstr this;
1497         int vm_enough_memory(long pages);
1498
1499         if (size > SHMEM_MAX_BYTES)
1500                 return ERR_PTR(-EINVAL);
1501
1502         if (!vm_enough_memory(VM_ACCT(size)))
1503                 return ERR_PTR(-ENOMEM);
1504
1505         this.name = name;
1506         this.len = strlen(name);
1507         this.hash = 0; /* will go */
1508         root = shm_mnt->mnt_root;
1509         dentry = d_alloc(root, &this);
1510         if (!dentry)
1511                 return ERR_PTR(-ENOMEM);
1512
1513         error = -ENFILE;
1514         file = get_empty_filp();
1515         if (!file)
1516                 goto put_dentry;
1517
1518         error = -ENOSPC;
1519         inode = shmem_get_inode(root->d_sb, S_IFREG | S_IRWXUGO, 0);
1520         if (!inode) 
1521                 goto close_file;
1522
1523         d_instantiate(dentry, inode);
1524         inode->i_size = size;
1525         inode->i_nlink = 0;     /* It is unlinked */
1526         file->f_vfsmnt = mntget(shm_mnt);
1527         file->f_dentry = dentry;
1528         file->f_op = &shmem_file_operations;
1529         file->f_mode = FMODE_WRITE | FMODE_READ;
1530         return(file);
1531
1532 close_file:
1533         put_filp(file);
1534 put_dentry:
1535         dput (dentry);
1536         return ERR_PTR(error);  
1537 }
1538
1539 /*
1540  * shmem_zero_setup - setup a shared anonymous mapping
1541  *
1542  * @vma: the vma to be mmapped is prepared by do_mmap_pgoff
1543  */
1544 int shmem_zero_setup(struct vm_area_struct *vma)
1545 {
1546         struct file *file;
1547         loff_t size = vma->vm_end - vma->vm_start;
1548         
1549         file = shmem_file_setup("dev/zero", size);
1550         if (IS_ERR(file))
1551                 return PTR_ERR(file);
1552
1553         if (vma->vm_file)
1554                 fput (vma->vm_file);
1555         vma->vm_file = file;
1556         vma->vm_ops = &shmem_vm_ops;
1557         return 0;
1558 }
1559
1560 EXPORT_SYMBOL(shmem_file_setup);