Merge remote-tracking branch 'net-next/master'
[linux] / kernel / cgroup / cgroup.c
index 9f61760..19da0ab 100644 (file)
@@ -54,6 +54,7 @@
 #include <linux/proc_ns.h>
 #include <linux/nsproxy.h>
 #include <linux/file.h>
+#include <linux/fs_parser.h>
 #include <linux/sched/cputime.h>
 #include <linux/psi.h>
 #include <net/sock.h>
@@ -1772,26 +1773,37 @@ int cgroup_show_path(struct seq_file *sf, struct kernfs_node *kf_node,
        return len;
 }
 
-static int parse_cgroup_root_flags(char *data, unsigned int *root_flags)
-{
-       char *token;
+enum cgroup2_param {
+       Opt_nsdelegate,
+       nr__cgroup2_params
+};
 
-       *root_flags = 0;
+static const struct fs_parameter_spec cgroup2_param_specs[] = {
+       fsparam_flag  ("nsdelegate",            Opt_nsdelegate),
+       {}
+};
 
-       if (!data || *data == '\0')
-               return 0;
+static const struct fs_parameter_description cgroup2_fs_parameters = {
+       .name           = "cgroup2",
+       .specs          = cgroup2_param_specs,
+};
 
-       while ((token = strsep(&data, ",")) != NULL) {
-               if (!strcmp(token, "nsdelegate")) {
-                       *root_flags |= CGRP_ROOT_NS_DELEGATE;
-                       continue;
-               }
+static int cgroup2_parse_param(struct fs_context *fc, struct fs_parameter *param)
+{
+       struct cgroup_fs_context *ctx = cgroup_fc2context(fc);
+       struct fs_parse_result result;
+       int opt;
 
-               pr_err("cgroup2: unknown option \"%s\"\n", token);
-               return -EINVAL;
-       }
+       opt = fs_parse(fc, &cgroup2_fs_parameters, param, &result);
+       if (opt < 0)
+               return opt;
 
-       return 0;
+       switch (opt) {
+       case Opt_nsdelegate:
+               ctx->flags |= CGRP_ROOT_NS_DELEGATE;
+               return 0;
+       }
+       return -EINVAL;
 }
 
 static void apply_cgroup_root_flags(unsigned int root_flags)
@@ -1811,16 +1823,11 @@ static int cgroup_show_options(struct seq_file *seq, struct kernfs_root *kf_root
        return 0;
 }
 
-static int cgroup_remount(struct kernfs_root *kf_root, int *flags, char *data)
+static int cgroup_reconfigure(struct fs_context *fc)
 {
-       unsigned int root_flags;
-       int ret;
-
-       ret = parse_cgroup_root_flags(data, &root_flags);
-       if (ret)
-               return ret;
+       struct cgroup_fs_context *ctx = cgroup_fc2context(fc);
 
-       apply_cgroup_root_flags(root_flags);
+       apply_cgroup_root_flags(ctx->flags);
        return 0;
 }
 
@@ -1908,8 +1915,9 @@ static void init_cgroup_housekeeping(struct cgroup *cgrp)
        INIT_WORK(&cgrp->release_agent_work, cgroup1_release_agent);
 }
 
-void init_cgroup_root(struct cgroup_root *root, struct cgroup_sb_opts *opts)
+void init_cgroup_root(struct cgroup_fs_context *ctx)
 {
+       struct cgroup_root *root = ctx->root;
        struct cgroup *cgrp = &root->cgrp;
 
        INIT_LIST_HEAD(&root->root_list);
@@ -1918,16 +1926,16 @@ void init_cgroup_root(struct cgroup_root *root, struct cgroup_sb_opts *opts)
        init_cgroup_housekeeping(cgrp);
        idr_init(&root->cgroup_idr);
 
-       root->flags = opts->flags;
-       if (opts->release_agent)
-               strscpy(root->release_agent_path, opts->release_agent, PATH_MAX);
-       if (opts->name)
-               strscpy(root->name, opts->name, MAX_CGROUP_ROOT_NAMELEN);
-       if (opts->cpuset_clone_children)
+       root->flags = ctx->flags;
+       if (ctx->release_agent)
+               strscpy(root->release_agent_path, ctx->release_agent, PATH_MAX);
+       if (ctx->name)
+               strscpy(root->name, ctx->name, MAX_CGROUP_ROOT_NAMELEN);
+       if (ctx->cpuset_clone_children)
                set_bit(CGRP_CPUSET_CLONE_CHILDREN, &root->cgrp.flags);
 }
 
-int cgroup_setup_root(struct cgroup_root *root, u16 ss_mask, int ref_flags)
+int cgroup_setup_root(struct cgroup_root *root, u16 ss_mask)
 {
        LIST_HEAD(tmp_links);
        struct cgroup *root_cgrp = &root->cgrp;
@@ -1944,7 +1952,7 @@ int cgroup_setup_root(struct cgroup_root *root, u16 ss_mask, int ref_flags)
        root_cgrp->ancestor_ids[0] = ret;
 
        ret = percpu_ref_init(&root_cgrp->self.refcnt, css_release,
-                             ref_flags, GFP_KERNEL);
+                             0, GFP_KERNEL);
        if (ret)
                goto out;
 
@@ -2028,57 +2036,104 @@ out:
        return ret;
 }
 
-struct dentry *cgroup_do_mount(struct file_system_type *fs_type, int flags,
-                              struct cgroup_root *root, unsigned long magic,
-                              struct cgroup_namespace *ns)
+int cgroup_do_get_tree(struct fs_context *fc)
 {
-       struct dentry *dentry;
-       bool new_sb;
+       struct cgroup_fs_context *ctx = cgroup_fc2context(fc);
+       int ret;
 
-       dentry = kernfs_mount(fs_type, flags, root->kf_root, magic, &new_sb);
+       ctx->kfc.root = ctx->root->kf_root;
+       if (fc->fs_type == &cgroup2_fs_type)
+               ctx->kfc.magic = CGROUP2_SUPER_MAGIC;
+       else
+               ctx->kfc.magic = CGROUP_SUPER_MAGIC;
+       ret = kernfs_get_tree(fc);
 
        /*
         * In non-init cgroup namespace, instead of root cgroup's dentry,
         * we return the dentry corresponding to the cgroupns->root_cgrp.
         */
-       if (!IS_ERR(dentry) && ns != &init_cgroup_ns) {
+       if (!ret && ctx->ns != &init_cgroup_ns) {
                struct dentry *nsdentry;
+               struct super_block *sb = fc->root->d_sb;
                struct cgroup *cgrp;
 
                mutex_lock(&cgroup_mutex);
                spin_lock_irq(&css_set_lock);
 
-               cgrp = cset_cgroup_from_root(ns->root_cset, root);
+               cgrp = cset_cgroup_from_root(ctx->ns->root_cset, ctx->root);
 
                spin_unlock_irq(&css_set_lock);
                mutex_unlock(&cgroup_mutex);
 
-               nsdentry = kernfs_node_dentry(cgrp->kn, dentry->d_sb);
-               dput(dentry);
-               dentry = nsdentry;
+               nsdentry = kernfs_node_dentry(cgrp->kn, sb);
+               dput(fc->root);
+               fc->root = nsdentry;
+               if (IS_ERR(nsdentry)) {
+                       ret = PTR_ERR(nsdentry);
+                       deactivate_locked_super(sb);
+               }
        }
 
-       if (IS_ERR(dentry) || !new_sb)
-               cgroup_put(&root->cgrp);
+       if (!ctx->kfc.new_sb_created)
+               cgroup_put(&ctx->root->cgrp);
 
-       return dentry;
+       return ret;
 }
 
-static struct dentry *cgroup_mount(struct file_system_type *fs_type,
-                        int flags, const char *unused_dev_name,
-                        void *data)
+/*
+ * Destroy a cgroup filesystem context.
+ */
+static void cgroup_fs_context_free(struct fs_context *fc)
 {
-       struct cgroup_namespace *ns = current->nsproxy->cgroup_ns;
-       struct dentry *dentry;
+       struct cgroup_fs_context *ctx = cgroup_fc2context(fc);
+
+       kfree(ctx->name);
+       kfree(ctx->release_agent);
+       put_cgroup_ns(ctx->ns);
+       kernfs_free_fs_context(fc);
+       kfree(ctx);
+}
+
+static int cgroup_get_tree(struct fs_context *fc)
+{
+       struct cgroup_fs_context *ctx = cgroup_fc2context(fc);
        int ret;
 
-       get_cgroup_ns(ns);
+       cgrp_dfl_visible = true;
+       cgroup_get_live(&cgrp_dfl_root.cgrp);
+       ctx->root = &cgrp_dfl_root;
 
-       /* Check if the caller has permission to mount. */
-       if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN)) {
-               put_cgroup_ns(ns);
-               return ERR_PTR(-EPERM);
-       }
+       ret = cgroup_do_get_tree(fc);
+       if (!ret)
+               apply_cgroup_root_flags(ctx->flags);
+       return ret;
+}
+
+static const struct fs_context_operations cgroup_fs_context_ops = {
+       .free           = cgroup_fs_context_free,
+       .parse_param    = cgroup2_parse_param,
+       .get_tree       = cgroup_get_tree,
+       .reconfigure    = cgroup_reconfigure,
+};
+
+static const struct fs_context_operations cgroup1_fs_context_ops = {
+       .free           = cgroup_fs_context_free,
+       .parse_param    = cgroup1_parse_param,
+       .get_tree       = cgroup1_get_tree,
+       .reconfigure    = cgroup1_reconfigure,
+};
+
+/*
+ * Initialise the cgroup filesystem creation/reconfiguration context.  Notably,
+ * we select the namespace we're going to use.
+ */
+static int cgroup_init_fs_context(struct fs_context *fc)
+{
+       struct cgroup_fs_context *ctx;
+
+       ctx = kzalloc(sizeof(struct cgroup_fs_context), GFP_KERNEL);
+       if (!ctx)
+               return -ENOMEM;
 
        /*
         * The first time anyone tries to mount a cgroup, enable the list
@@ -2087,29 +2142,18 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
        if (!use_task_css_set_links)
                cgroup_enable_task_cg_lists();
 
-       if (fs_type == &cgroup2_fs_type) {
-               unsigned int root_flags;
-
-               ret = parse_cgroup_root_flags(data, &root_flags);
-               if (ret) {
-                       put_cgroup_ns(ns);
-                       return ERR_PTR(ret);
-               }
-
-               cgrp_dfl_visible = true;
-               cgroup_get_live(&cgrp_dfl_root.cgrp);
-
-               dentry = cgroup_do_mount(&cgroup2_fs_type, flags, &cgrp_dfl_root,
-                                        CGROUP2_SUPER_MAGIC, ns);
-               if (!IS_ERR(dentry))
-                       apply_cgroup_root_flags(root_flags);
-       } else {
-               dentry = cgroup1_mount(&cgroup_fs_type, flags, data,
-                                      CGROUP_SUPER_MAGIC, ns);
-       }
-
-       put_cgroup_ns(ns);
-       return dentry;
+       ctx->ns = current->nsproxy->cgroup_ns;
+       get_cgroup_ns(ctx->ns);
+       fc->fs_private = &ctx->kfc;
+       if (fc->fs_type == &cgroup2_fs_type)
+               fc->ops = &cgroup_fs_context_ops;
+       else
+               fc->ops = &cgroup1_fs_context_ops;
+       if (fc->user_ns)
+               put_user_ns(fc->user_ns);
+       fc->user_ns = get_user_ns(ctx->ns->user_ns);
+       fc->global = true;
+       return 0;
 }
 
 static void cgroup_kill_sb(struct super_block *sb)
@@ -2118,33 +2162,33 @@ static void cgroup_kill_sb(struct super_block *sb)
        struct cgroup_root *root = cgroup_root_from_kf(kf_root);
 
        /*
-        * If @root doesn't have any mounts or children, start killing it.
+        * If @root doesn't have any children, start killing it.
         * This prevents new mounts by disabling percpu_ref_tryget_live().
         * cgroup_mount() may wait for @root's release.
         *
         * And don't kill the default root.
         */
-       if (!list_empty(&root->cgrp.self.children) ||
-           root == &cgrp_dfl_root)
-               cgroup_put(&root->cgrp);
-       else
+       if (list_empty(&root->cgrp.self.children) && root != &cgrp_dfl_root &&
+           !percpu_ref_is_dying(&root->cgrp.self.refcnt))
                percpu_ref_kill(&root->cgrp.self.refcnt);
-
+       cgroup_put(&root->cgrp);
        kernfs_kill_sb(sb);
 }
 
 struct file_system_type cgroup_fs_type = {
-       .name = "cgroup",
-       .mount = cgroup_mount,
-       .kill_sb = cgroup_kill_sb,
-       .fs_flags = FS_USERNS_MOUNT,
+       .name                   = "cgroup",
+       .init_fs_context        = cgroup_init_fs_context,
+       .parameters             = &cgroup1_fs_parameters,
+       .kill_sb                = cgroup_kill_sb,
+       .fs_flags               = FS_USERNS_MOUNT,
 };
 
 static struct file_system_type cgroup2_fs_type = {
-       .name = "cgroup2",
-       .mount = cgroup_mount,
-       .kill_sb = cgroup_kill_sb,
-       .fs_flags = FS_USERNS_MOUNT,
+       .name                   = "cgroup2",
+       .init_fs_context        = cgroup_init_fs_context,
+       .parameters             = &cgroup2_fs_parameters,
+       .kill_sb                = cgroup_kill_sb,
+       .fs_flags               = FS_USERNS_MOUNT,
 };
 
 int cgroup_path_ns_locked(struct cgroup *cgrp, char *buf, size_t buflen,
@@ -5267,7 +5311,6 @@ int cgroup_rmdir(struct kernfs_node *kn)
 
 static struct kernfs_syscall_ops cgroup_kf_syscall_ops = {
        .show_options           = cgroup_show_options,
-       .remount_fs             = cgroup_remount,
        .mkdir                  = cgroup_mkdir,
        .rmdir                  = cgroup_rmdir,
        .show_path              = cgroup_show_path,
@@ -5334,11 +5377,12 @@ static void __init cgroup_init_subsys(struct cgroup_subsys *ss, bool early)
  */
 int __init cgroup_init_early(void)
 {
-       static struct cgroup_sb_opts __initdata opts;
+       static struct cgroup_fs_context __initdata ctx;
        struct cgroup_subsys *ss;
        int i;
 
-       init_cgroup_root(&cgrp_dfl_root, &opts);
+       ctx.root = &cgrp_dfl_root;
+       init_cgroup_root(&ctx);
        cgrp_dfl_root.cgrp.self.flags |= CSS_NO_REF;
 
        RCU_INIT_POINTER(init_task.cgroups, &init_css_set);
@@ -5399,7 +5443,7 @@ int __init cgroup_init(void)
        hash_add(css_set_table, &init_css_set.hlist,
                 css_set_hash(init_css_set.subsys));
 
-       BUG_ON(cgroup_setup_root(&cgrp_dfl_root, 0, 0));
+       BUG_ON(cgroup_setup_root(&cgrp_dfl_root, 0));
 
        mutex_unlock(&cgroup_mutex);