4 * Copyright (C) 1991, 1992 Linus Torvalds
6 * super.c contains code to handle: - mount structures
8 * - filesystem drivers list
10 * - umount system call
13 * GK 2/5/95 - Changed to support mounting the root fs via NFS
15 * Added kerneld support: Jacques Gelinas and Bjorn Ekwall
16 * Added change_root: Werner Almesberger & Hans Lermen, Feb '96
17 * Added options to /proc/mounts:
18 * Torbjörn Lindh (torbjorn.lindh@gopta.se), April 14, 1996.
19 * Added devfs support: Richard Gooch <rgooch@atnf.csiro.au>, 13-JAN-1998
20 * Heavily rewritten for 'one fs - one tree' dcache architecture. AV, Mar 2000
23 #include <linux/config.h>
24 #include <linux/slab.h>
25 #include <linux/locks.h>
26 #include <linux/smp_lock.h>
27 #include <linux/devfs_fs_kernel.h>
28 #include <linux/major.h>
29 #include <linux/acct.h>
31 #include <asm/uaccess.h>
33 #include <linux/kmod.h>
34 #define __NO_VERSION__
35 #include <linux/module.h>
37 LIST_HEAD(super_blocks);
38 spinlock_t sb_lock = SPIN_LOCK_UNLOCKED;
41 * Handling of filesystem drivers list.
43 * Inclusion to/removals from/scanning of list are protected by spinlock.
44 * During the unload module must call unregister_filesystem().
45 * We can access the fields of list element if:
46 * 1) spinlock is held or
47 * 2) we hold the reference to the module.
48 * The latter can be guaranteed by call of try_inc_mod_count(); if it
49 * returned 0 we must skip the element, otherwise we got the reference.
50 * Once the reference is obtained we can drop the spinlock.
53 static struct file_system_type *file_systems;
54 static rwlock_t file_systems_lock = RW_LOCK_UNLOCKED;
56 /* WARNING: This can be used only if we _already_ own a reference */
57 static void get_filesystem(struct file_system_type *fs)
60 __MOD_INC_USE_COUNT(fs->owner);
63 static void put_filesystem(struct file_system_type *fs)
66 __MOD_DEC_USE_COUNT(fs->owner);
69 static struct file_system_type **find_filesystem(const char *name)
71 struct file_system_type **p;
72 for (p=&file_systems; *p; p=&(*p)->next)
73 if (strcmp((*p)->name,name) == 0)
79 * register_filesystem - register a new filesystem
80 * @fs: the file system structure
82 * Adds the file system passed to the list of file systems the kernel
83 * is aware of for mount and other syscalls. Returns 0 on success,
84 * or a negative errno code on an error.
86 * The &struct file_system_type that is passed is linked into the kernel
87 * structures and must not be freed until the file system has been
91 int register_filesystem(struct file_system_type * fs)
94 struct file_system_type ** p;
100 INIT_LIST_HEAD(&fs->fs_supers);
101 write_lock(&file_systems_lock);
102 p = find_filesystem(fs->name);
107 write_unlock(&file_systems_lock);
112 * unregister_filesystem - unregister a file system
113 * @fs: filesystem to unregister
115 * Remove a file system that was previously successfully registered
116 * with the kernel. An error is returned if the file system is not found.
117 * Zero is returned on a success.
119 * Once this function has returned the &struct file_system_type structure
120 * may be freed or reused.
123 int unregister_filesystem(struct file_system_type * fs)
125 struct file_system_type ** tmp;
127 write_lock(&file_systems_lock);
133 write_unlock(&file_systems_lock);
138 write_unlock(&file_systems_lock);
142 static int fs_index(const char * __name)
144 struct file_system_type * tmp;
148 name = getname(__name);
154 read_lock(&file_systems_lock);
155 for (tmp=file_systems, index=0 ; tmp ; tmp=tmp->next, index++) {
156 if (strcmp(tmp->name,name) == 0) {
161 read_unlock(&file_systems_lock);
166 static int fs_name(unsigned int index, char * buf)
168 struct file_system_type * tmp;
171 read_lock(&file_systems_lock);
172 for (tmp = file_systems; tmp; tmp = tmp->next, index--)
173 if (index <= 0 && try_inc_mod_count(tmp->owner))
175 read_unlock(&file_systems_lock);
179 /* OK, we got the reference, so we can safely block */
180 len = strlen(tmp->name) + 1;
181 res = copy_to_user(buf, tmp->name, len) ? -EFAULT : 0;
186 static int fs_maxindex(void)
188 struct file_system_type * tmp;
191 read_lock(&file_systems_lock);
192 for (tmp = file_systems, index = 0 ; tmp ; tmp = tmp->next, index++)
194 read_unlock(&file_systems_lock);
199 * Whee.. Weird sysv syscall.
201 asmlinkage long sys_sysfs(int option, unsigned long arg1, unsigned long arg2)
203 int retval = -EINVAL;
207 retval = fs_index((const char *) arg1);
211 retval = fs_name(arg1, (char *) arg2);
215 retval = fs_maxindex();
221 int get_filesystem_list(char * buf)
224 struct file_system_type * tmp;
226 read_lock(&file_systems_lock);
228 while (tmp && len < PAGE_SIZE - 80) {
229 len += sprintf(buf+len, "%s\t%s\n",
230 (tmp->fs_flags & FS_REQUIRES_DEV) ? "" : "nodev",
234 read_unlock(&file_systems_lock);
238 struct file_system_type *get_fs_type(const char *name)
240 struct file_system_type *fs;
242 read_lock(&file_systems_lock);
243 fs = *(find_filesystem(name));
244 if (fs && !try_inc_mod_count(fs->owner))
246 read_unlock(&file_systems_lock);
247 if (!fs && (request_module(name) == 0)) {
248 read_lock(&file_systems_lock);
249 fs = *(find_filesystem(name));
250 if (fs && !try_inc_mod_count(fs->owner))
252 read_unlock(&file_systems_lock);
258 * alloc_super - create new superblock
260 * Allocates and initializes a new &struct super_block. alloc_super()
261 * returns a pointer new superblock or %NULL if allocation had failed.
263 static struct super_block *alloc_super(void)
265 static struct super_operations empty_sops = {};
266 struct super_block *s = kmalloc(sizeof(struct super_block), GFP_USER);
268 memset(s, 0, sizeof(struct super_block));
269 INIT_LIST_HEAD(&s->s_dirty);
270 INIT_LIST_HEAD(&s->s_locked_inodes);
271 INIT_LIST_HEAD(&s->s_files);
272 INIT_LIST_HEAD(&s->s_instances);
273 init_rwsem(&s->s_umount);
274 sema_init(&s->s_lock, 1);
275 down_write(&s->s_umount);
277 atomic_set(&s->s_active, 1);
278 sema_init(&s->s_vfs_rename_sem,1);
279 sema_init(&s->s_nfsd_free_path_sem,1);
280 sema_init(&s->s_dquot.dqio_sem, 1);
281 sema_init(&s->s_dquot.dqoff_sem, 1);
282 s->s_maxbytes = MAX_NON_LFS;
283 s->s_op = &empty_sops;
289 * destroy_super - frees a superblock
290 * @s: superblock to free
292 * Frees a superblock.
294 static inline void destroy_super(struct super_block *s)
299 /* Superblock refcounting */
302 * deactivate_super - turn an active reference into temporary
303 * @s: superblock to deactivate
305 * Turns an active reference into temporary one. Returns 0 if there are
306 * other active references, 1 if we had deactivated the last one.
308 static inline int deactivate_super(struct super_block *s)
310 if (!atomic_dec_and_lock(&s->s_active, &sb_lock))
312 s->s_count -= S_BIAS-1;
313 spin_unlock(&sb_lock);
318 * put_super - drop a temporary reference to superblock
319 * @s: superblock in question
321 * Drops a temporary reference, frees superblock if there's no
324 static inline void put_super(struct super_block *s)
329 spin_unlock(&sb_lock);
333 * grab_super - acquire an active reference
334 * @s - reference we are trying to make active
336 * Tries to acquire an active reference. grab_super() is used when we
337 * had just found a superblock in super_blocks or fs_type->fs_supers
338 * and want to turn it into a full-blown active reference. grab_super()
339 * is called with sb_lock held and drops it. Returns 1 in case of
340 * success, 0 if we had failed (superblock contents was already dead or
341 * dying when grab_super() had been called).
343 static int grab_super(struct super_block *s)
346 spin_unlock(&sb_lock);
347 down_write(&s->s_umount);
350 if (s->s_count > S_BIAS) {
351 atomic_inc(&s->s_active);
353 spin_unlock(&sb_lock);
356 spin_unlock(&sb_lock);
358 up_write(&s->s_umount);
364 * insert_super - put superblock on the lists
365 * @s: superblock in question
366 * @type: filesystem type it will belong to
368 * Associates superblock with fs type and puts it on per-type and global
369 * superblocks' lists. Should be called with sb_lock held; drops it.
371 static void insert_super(struct super_block *s, struct file_system_type *type)
374 list_add(&s->s_list, super_blocks.prev);
375 list_add(&s->s_instances, &type->fs_supers);
376 spin_unlock(&sb_lock);
377 get_filesystem(type);
380 static void put_anon_dev(kdev_t dev);
383 * remove_super - makes superblock unreachable
384 * @s: superblock in question
386 * Removes superblock from the lists, unlocks it, drop the reference
387 * and releases the hosting device. @s should have no active
388 * references by that time and after remove_super() it's essentially
389 * in rundown mode - all remaining references are temporary, no new
390 * reference of any sort are going to appear and all holders of
391 * temporary ones will eventually drop them. At that point superblock
392 * itself will be destroyed; all its contents is already gone.
394 static void remove_super(struct super_block *s)
396 kdev_t dev = s->s_dev;
397 struct block_device *bdev = s->s_bdev;
398 struct file_system_type *fs = s->s_type;
401 list_del(&s->s_list);
402 list_del(&s->s_instances);
403 spin_unlock(&sb_lock);
404 up_write(&s->s_umount);
408 blkdev_put(bdev, BDEV_FS);
413 struct vfsmount *alloc_vfsmnt(char *name);
414 void free_vfsmnt(struct vfsmount *mnt);
416 static inline struct super_block * find_super(kdev_t dev)
420 list_for_each(p, &super_blocks) {
421 struct super_block * s = sb_entry(p);
422 if (s->s_dev == dev) {
430 void drop_super(struct super_block *sb)
432 up_read(&sb->s_umount);
436 static inline void write_super(struct super_block *sb)
439 if (sb->s_root && sb->s_dirt)
440 if (sb->s_op && sb->s_op->write_super)
441 sb->s_op->write_super(sb);
446 * Note: check the dirty flag before waiting, so we don't
447 * hold up the sync while mounting a device. (The newly
448 * mounted device won't need syncing.)
450 void sync_supers(kdev_t dev, int wait)
452 struct super_block * sb;
459 if (wait && sb->s_op && sb->s_op->sync_fs)
460 sb->s_op->sync_fs(sb);
467 sb = sb_entry(super_blocks.next);
468 while (sb != sb_entry(&super_blocks))
471 spin_unlock(&sb_lock);
472 down_read(&sb->s_umount);
474 if (wait && sb->s_root && sb->s_op && sb->s_op->sync_fs)
475 sb->s_op->sync_fs(sb);
479 sb = sb_entry(sb->s_list.next);
480 spin_unlock(&sb_lock);
484 * get_super - get the superblock of a device
485 * @dev: device to get the superblock for
487 * Scans the superblock list and finds the superblock of the file system
488 * mounted on the device given. %NULL is returned if no match is found.
491 struct super_block * get_super(kdev_t dev)
493 struct super_block * s;
501 spin_unlock(&sb_lock);
502 down_read(&s->s_umount);
508 spin_unlock(&sb_lock);
512 asmlinkage long sys_ustat(dev_t dev, struct ustat * ubuf)
514 struct super_block *s;
519 s = get_super(to_kdev_t(dev));
522 err = vfs_statfs(s, &sbuf);
527 memset(&tmp,0,sizeof(struct ustat));
528 tmp.f_tfree = sbuf.f_bfree;
529 tmp.f_tinode = sbuf.f_ffree;
531 err = copy_to_user(ubuf,&tmp,sizeof(struct ustat)) ? -EFAULT : 0;
537 * do_remount_sb - asks filesystem to change mount options.
538 * @sb: superblock in question
539 * @flags: numeric part of options
540 * @data: the rest of options
542 * Alters the mount options of a mounted file system.
544 int do_remount_sb(struct super_block *sb, int flags, void *data)
548 if (!(flags & MS_RDONLY) && sb->s_dev && is_read_only(sb->s_dev))
550 /*flags |= MS_RDONLY;*/
551 if (flags & MS_RDONLY)
552 acct_auto_close(sb->s_dev);
553 shrink_dcache_sb(sb);
555 /* If we are remounting RDONLY, make sure there are no rw files open */
556 if ((flags & MS_RDONLY) && !(sb->s_flags & MS_RDONLY))
557 if (!fs_may_remount_ro(sb))
559 if (sb->s_op && sb->s_op->remount_fs) {
561 retval = sb->s_op->remount_fs(sb, &flags, data);
566 sb->s_flags = (sb->s_flags & ~MS_RMT_MASK) | (flags & MS_RMT_MASK);
571 * Unnamed block devices are dummy devices used by virtual
572 * filesystems which don't use real block-devices. -- jrs
575 enum {Max_anon = 256};
576 static unsigned long unnamed_dev_in_use[Max_anon/(8*sizeof(unsigned long))];
577 static spinlock_t unnamed_dev_lock = SPIN_LOCK_UNLOCKED;/* protects the above */
580 * put_anon_dev - release anonymous device number.
581 * @dev: device in question
583 static void put_anon_dev(kdev_t dev)
585 spin_lock(&unnamed_dev_lock);
586 clear_bit(MINOR(dev), unnamed_dev_in_use);
587 spin_unlock(&unnamed_dev_lock);
591 * get_anon_super - allocate a superblock for non-device fs
592 * @type: filesystem type
593 * @compare: check if existing superblock is what we want
594 * @data: argument for @compare.
596 * get_anon_super is a helper for non-blockdevice filesystems.
597 * It either finds and returns one of the superblocks of given type
598 * (if it can find one that would satisfy caller) or creates a new
599 * one. In the either case we return an active reference to superblock
600 * with ->s_umount locked. If superblock is new it gets a new
601 * anonymous device allocated for it and is inserted into lists -
602 * other initialization is left to caller.
604 * Rather than duplicating all that logics every time when
605 * we want something that doesn't fit "nodev" and "single" we pull
606 * the relevant code into common helper and let get_sb_...() call
609 * NB: get_sb_...() is going to become an fs type method, with
610 * current ->read_super() becoming a callback used by common instances.
612 struct super_block *get_anon_super(struct file_system_type *type,
613 int (*compare)(struct super_block *,void *), void *data)
615 struct super_block *s = alloc_super();
620 return ERR_PTR(-ENOMEM);
624 if (compare) list_for_each(p, &type->fs_supers) {
625 struct super_block *old;
626 old = list_entry(p, struct super_block, s_instances);
627 if (!compare(old, data))
629 if (!grab_super(old))
635 spin_lock(&unnamed_dev_lock);
636 dev = find_first_zero_bit(unnamed_dev_in_use, Max_anon);
637 if (dev == Max_anon) {
638 spin_unlock(&unnamed_dev_lock);
639 spin_unlock(&sb_lock);
641 return ERR_PTR(-EMFILE);
643 set_bit(dev, unnamed_dev_in_use);
644 spin_unlock(&unnamed_dev_lock);
647 insert_super(s, type);
651 static struct super_block *get_sb_bdev(struct file_system_type *fs_type,
652 int flags, char *dev_name, void * data)
655 struct block_device *bdev;
656 struct block_device_operations *bdops;
658 struct super_block * s;
663 mode_t mode = FMODE_READ; /* we always need it ;-) */
665 /* What device it is? */
666 if (!dev_name || !*dev_name)
667 return ERR_PTR(-EINVAL);
668 error = path_lookup(dev_name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, &nd);
670 return ERR_PTR(error);
671 inode = nd.dentry->d_inode;
673 if (!S_ISBLK(inode->i_mode))
676 if (nd.mnt->mnt_flags & MNT_NODEV)
679 bdev = inode->i_bdev;
680 de = devfs_get_handle_from_inode (inode);
681 bdops = devfs_get_ops (de); /* Increments module use count */
682 if (bdops) bdev->bd_op = bdops;
683 /* Done with lookups, semaphore down */
684 dev = to_kdev_t(bdev->bd_dev);
685 if (!(flags & MS_RDONLY))
687 error = blkdev_get(bdev, mode, 0, BDEV_FS);
688 devfs_put_ops (de); /* Decrement module use count now we're safe */
691 check_disk_change(dev);
693 if (!(flags & MS_RDONLY) && is_read_only(dev))
705 list_for_each(p, &super_blocks) {
706 struct super_block *old = sb_entry(p);
707 if (old->s_dev != dev)
709 if (old->s_type != fs_type ||
710 ((flags ^ old->s_flags) & MS_RDONLY)) {
711 spin_unlock(&sb_lock);
715 if (!grab_super(old))
718 blkdev_put(bdev, BDEV_FS);
725 insert_super(s, fs_type);
726 if (!fs_type->read_super(s, data, flags & MS_VERBOSE ? 1 : 0))
728 s->s_flags |= MS_ACTIVE;
738 blkdev_put(bdev, BDEV_FS);
741 return ERR_PTR(error);
744 static struct super_block *get_sb_nodev(struct file_system_type *fs_type,
745 int flags, char *dev_name, void *data)
747 struct super_block *s = get_anon_super(fs_type, NULL, NULL);
753 if (!fs_type->read_super(s, data, flags & MS_VERBOSE ? 1 : 0)) {
756 return ERR_PTR(-EINVAL);
758 s->s_flags |= MS_ACTIVE;
762 static int compare_single(struct super_block *s, void *p)
767 static struct super_block *get_sb_single(struct file_system_type *fs_type,
768 int flags, char *dev_name, void *data)
770 struct super_block *s = get_anon_super(fs_type, compare_single, NULL);
776 if (!fs_type->read_super(s, data, flags & MS_VERBOSE ? 1 : 0)) {
779 return ERR_PTR(-EINVAL);
781 s->s_flags |= MS_ACTIVE;
783 do_remount_sb(s, flags, data);
788 do_kern_mount(const char *fstype, int flags, char *name, void *data)
790 struct file_system_type *type = get_fs_type(fstype);
791 struct super_block *sb = ERR_PTR(-ENOMEM);
792 struct vfsmount *mnt;
795 return ERR_PTR(-ENODEV);
797 mnt = alloc_vfsmnt(name);
800 if (type->fs_flags & FS_REQUIRES_DEV)
801 sb = get_sb_bdev(type, flags, name, data);
802 else if (type->fs_flags & FS_SINGLE)
803 sb = get_sb_single(type, flags, name, data);
805 sb = get_sb_nodev(type, flags, name, data);
808 if (type->fs_flags & FS_NOMOUNT)
809 sb->s_flags |= MS_NOUSER;
811 mnt->mnt_root = dget(sb->s_root);
812 mnt->mnt_mountpoint = sb->s_root;
813 mnt->mnt_parent = mnt;
814 up_write(&sb->s_umount);
815 put_filesystem(type);
820 put_filesystem(type);
821 return (struct vfsmount *)sb;
824 void kill_super(struct super_block *sb)
826 struct dentry *root = sb->s_root;
827 struct file_system_type *fs = sb->s_type;
828 struct super_operations *sop = sb->s_op;
830 if (!deactivate_super(sb))
833 down_write(&sb->s_umount);
835 /* Need to clean after the sucker */
836 if (fs->fs_flags & FS_LITTER)
838 shrink_dcache_parent(root);
843 sb->s_flags &= ~MS_ACTIVE;
844 invalidate_inodes(sb); /* bad name - it should be evict_inodes() */
846 if (sop->write_super && sb->s_dirt)
847 sop->write_super(sb);
852 /* Forget any remaining inodes */
853 if (invalidate_inodes(sb)) {
854 printk(KERN_ERR "VFS: Busy inodes after unmount. "
855 "Self-destruct in 5 seconds. Have a nice day...\n");
863 struct vfsmount *kern_mount(struct file_system_type *type)
865 return do_kern_mount(type->name, 0, (char *)type->name, NULL);