[linux-yocto] [PATCH] [RFC] cgroups: Resource controller for open files
Bruce Ashfield
bruce.ashfield at windriver.com
Tue Jul 29 20:06:50 PDT 2014
On 2014-07-29, 1:58 AM, zhe.he at windriver.com wrote:
> From: He Zhe <zhe.he at windriver.com>
How did you extract this patch from the mailing list ? The
From: field should not be changing, since you aren't the
original author of the patch.
>
> Add a resource controller for limiting the number of open
> file handles. This allows us to catch misbehaving processes
> and return EMFILE instead of ENOMEM for kernel memory limits.
>
> Signed-off-by: Binder Makin <merimus at google.com>
>
> Port from lkml: https://lkml.org/lkml/2014/7/2/640
> Correct wrong handling in fs/file.c:do_dup2
This should be in a separate patch.
So let's see a new send of this, with patch 1/2 being the upstream
change (with the author information intact) and the 2/2 being any
fixes you did to the original.
Bruce
>
> Signed-off-by: He Zhe <zhe.he at windriver.com>
> ---
> fs/Makefile | 1 +
> fs/file.c | 46 ++++++++
> fs/filescontrol.c | 249 +++++++++++++++++++++++++++++++++++++++++
> include/linux/cgroup_subsys.h | 5 +
> include/linux/fdtable.h | 3 +
> include/linux/filescontrol.h | 32 ++++++
> init/Kconfig | 7 ++
> 7 files changed, 343 insertions(+)
> create mode 100644 fs/filescontrol.c
> create mode 100644 include/linux/filescontrol.h
>
> diff --git a/fs/Makefile b/fs/Makefile
> index ebfe2ee..18eaee0 100644
> --- a/fs/Makefile
> +++ b/fs/Makefile
> @@ -48,6 +48,7 @@ obj-$(CONFIG_COREDUMP) += coredump.o
> obj-$(CONFIG_SYSCTL) += drop_caches.o
>
> obj-$(CONFIG_FHANDLE) += fhandle.o
> +obj-$(CONFIG_CGROUP_FILES) += filescontrol.o
>
> obj-y += quota/
>
> diff --git a/fs/file.c b/fs/file.c
> index eb56a13..e615dc9 100644
> --- a/fs/file.c
> +++ b/fs/file.c
> @@ -22,6 +22,7 @@
> #include <linux/spinlock.h>
> #include <linux/rcupdate.h>
> #include <linux/workqueue.h>
> +#include <linux/filescontrol.h>
>
> int sysctl_nr_open __read_mostly = 1024*1024;
> int sysctl_nr_open_min = BITS_PER_LONG;
> @@ -264,6 +265,9 @@ struct files_struct *dup_fd(struct files_struct *oldf, int *errorp)
> new_fdt->close_on_exec = newf->close_on_exec_init;
> new_fdt->open_fds = newf->open_fds_init;
> new_fdt->fd = &newf->fd_array[0];
> +#ifdef CONFIG_CGROUP_FILES
> + files_cgroup_assign(newf);
> +#endif
>
> spin_lock(&oldf->file_lock);
> old_fdt = files_fdtable(oldf);
> @@ -340,9 +344,28 @@ struct files_struct *dup_fd(struct files_struct *oldf, int *errorp)
>
> rcu_assign_pointer(newf->fdt, new_fdt);
>
> +#ifdef CONFIG_CGROUP_FILES
> + if (!files_cgroup_alloc_fd(newf, files_cgroup_count_fds(newf)))
> + return newf;
> +
> +/* could not get enough FD resources. Need to clean up. */
> + new_fds = new_fdt->fd;
> + for (i = open_files; i != 0; i--) {
> + struct file *f = *new_fds++;
> + if (f)
> + fput(f);
> + }
> + if (new_fdt != &newf->fdtab)
> + __free_fdtable(new_fdt);
> + *errorp = -EMFILE;
> +#else
> return newf;
> +#endif
>
> out_release:
> +#ifdef CONFIG_CGROUP_FILES
> + files_cgroup_remove(newf);
> +#endif
> kmem_cache_free(files_cachep, newf);
> out:
> return NULL;
> @@ -368,6 +391,9 @@ static struct fdtable *close_files(struct files_struct * files)
> if (set & 1) {
> struct file * file = xchg(&fdt->fd[i], NULL);
> if (file) {
> +#ifdef CONFIG_CGROUP_FILES
> + files_cgroup_unalloc_fd(files, 1);
> +#endif
> filp_close(file, files);
> cond_resched();
> }
> @@ -486,6 +512,13 @@ repeat:
> if (error)
> goto repeat;
>
> +#ifdef CONFIG_CGROUP_FILES
> + if (files_cgroup_alloc_fd(files, 1)) {
> + error = -EMFILE;
> + goto out;
> + }
> +#endif
> +
> if (start <= files->next_fd)
> files->next_fd = fd + 1;
>
> @@ -522,6 +555,10 @@ EXPORT_SYMBOL(get_unused_fd_flags);
> static void __put_unused_fd(struct files_struct *files, unsigned int fd)
> {
> struct fdtable *fdt = files_fdtable(files);
> +#ifdef CONFIG_CGROUP_FILES
> + if (test_bit(fd, fdt->open_fds))
> + files_cgroup_unalloc_fd(files, 1);
> +#endif
> __clear_open_fd(fd, fdt);
> if (fd < files->next_fd)
> files->next_fd = fd;
> @@ -780,6 +817,15 @@ static int do_dup2(struct files_struct *files,
> tofree = fdt->fd[fd];
> if (!tofree && fd_is_open(fd, fdt))
> goto Ebusy;
> +
> +#ifdef CONFIG_CGROUP_FILES
> + if (!tofree)
> + if (files_cgroup_alloc_fd(files, 1)) {
> + spin_unlock(&files->file_lock);
> + return -EMFILE;
> + }
> +#endif
> +
> get_file(file);
> rcu_assign_pointer(fdt->fd[fd], file);
> __set_open_fd(fd, fdt);
> diff --git a/fs/filescontrol.c b/fs/filescontrol.c
> new file mode 100644
> index 0000000..0ba8ffa
> --- /dev/null
> +++ b/fs/filescontrol.c
> @@ -0,0 +1,249 @@
> +/* filescontrol.c - Cgroup controller for open file handles.
> + *
> + * Copyright 2014 Google Inc.
> + * Author: Brian Makin <merimus at google.com>
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License as published by
> + * the Free Software Foundation; either version 2 of the License, or
> + * (at your option) any later version.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
> + * GNU General Public License for more details.
> + */
> +
> +#include <linux/res_counter.h>
> +#include <linux/filescontrol.h>
> +#include <linux/cgroup.h>
> +#include <linux/export.h>
> +#include <linux/printk.h>
> +#include <linux/slab.h>
> +#include <linux/fs.h>
> +#include <linux/fdtable.h>
> +
> +struct cgroup_subsys files_subsys __read_mostly;
> +EXPORT_SYMBOL(files_subsys);
> +
> +struct files_cgroup {
> + struct cgroup_subsys_state css;
> + struct res_counter open_handles;
> +};
> +
> +static inline struct files_cgroup *css_fcg(struct cgroup_subsys_state *css)
> +{
> + return css ? container_of(css, struct files_cgroup, css) : NULL;
> +}
> +
> +static inline struct res_counter *
> +css_res_open_handles(struct cgroup_subsys_state *css)
> +{
> + return &css_fcg(css)->open_handles;
> +}
> +
> +static inline struct files_cgroup *
> +files_cgroup_from_files(struct files_struct *files)
> +{
> + return files->files_cgroup;
> +}
> +
> +static struct cgroup_subsys_state *
> +files_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
> +{
> + struct files_cgroup *fcg;
> +
> + fcg = kzalloc(sizeof(*fcg), GFP_KERNEL);
> + if (!fcg)
> + goto out;
> +
> + if (!parent_css) {
> + res_counter_init(&fcg->open_handles, NULL);
> + res_counter_set_limit(&fcg->open_handles, get_max_files());
> + } else {
> + struct files_cgroup *parent_fcg = css_fcg(parent_css);
> + res_counter_init(&fcg->open_handles, &parent_fcg->open_handles);
> + res_counter_set_limit(&fcg->open_handles,
> + res_counter_read_u64(&parent_fcg->open_handles,
> + RES_LIMIT));
> + }
> + return &fcg->css;
> +
> +out:
> + return ERR_PTR(-ENOMEM);
> +}
> +
> +static void files_cgroup_css_free(struct cgroup_subsys_state *css)
> +{
> + kfree(css_fcg(css));
> +}
> +
> +u64 files_cgroup_count_fds(struct files_struct *files)
> +{
> + int i;
> + struct fdtable *fdt;
> + int retval = 0;
> +
> + fdt = files_fdtable(files);
> + for (i = 0; i < DIV_ROUND_UP(fdt->max_fds, BITS_PER_LONG); i++)
> + retval += hweight64((__u64)fdt->open_fds[i]);
> + return retval;
> +}
> +
> +static u64 files_in_taskset(struct cgroup_taskset *tset)
> +{
> + struct cgroup_subsys_state *css = NULL;
> + struct task_struct *task;
> + u64 files = 0;
> + cgroup_taskset_for_each(task, css, tset) {
> + if (!thread_group_leader(task))
> + continue;
> +
> + task_lock(task);
> + files += files_cgroup_count_fds(task->files);
> + task_unlock(task);
> + }
> + return files;
> +}
> +
> +/*
> + * If attaching this cgroup would overcommit the resource then deny
> + * the attach.
> + */
> +static int files_cgroup_can_attach(struct cgroup_subsys_state *css,
> + struct cgroup_taskset *tset)
> +{
> + u64 files = files_in_taskset(tset);
> + if (res_counter_margin(css_res_open_handles(css)) < files)
> + return -ENOMEM;
> + return 0;
> +}
> +
> +/*
> + * If resource counts have gone up between can_attach and attach then
> + * this may overcommit resources. In that case just deny further allocation
> + * until the resource usage drops.
> + */
> +static void files_cgroup_attach(struct cgroup_subsys_state *to_css,
> + struct cgroup_taskset *tset)
> +{
> + u64 num_files;
> + struct task_struct *task = cgroup_taskset_first(tset);
> + struct cgroup_subsys_state *from_css;
> + struct res_counter *from_res;
> + struct res_counter *to_res = css_res_open_handles(to_css);
> + struct res_counter *fail_res;
> + struct files_struct *files;
> +
> + task_lock(task);
> + files = task->files;
> + if (!files) {
> + task_unlock(task);
> + return;
> + }
> +
> + from_css = &files_cgroup_from_files(files)->css;
> + from_res = css_res_open_handles(from_css);
> +
> + spin_lock(&files->file_lock);
> + num_files = files_cgroup_count_fds(files);
> + res_counter_uncharge(from_res, num_files);
> + css_put(from_css);
> +
> + if (res_counter_charge(to_res, num_files, &fail_res))
> + pr_err("Open files limit overcommited\n");
> + css_get(to_css);
> +
> + task->files->files_cgroup = css_fcg(to_css);
> + spin_unlock(&files->file_lock);
> + task_unlock(task);
> +}
> +
> +int files_cgroup_alloc_fd(struct files_struct *files, u64 n)
> +{
> + struct res_counter *fail_res;
> + struct files_cgroup *files_cgroup = files_cgroup_from_files(files);
> +
> + if (res_counter_charge(&files_cgroup->open_handles, n, &fail_res))
> + return -ENOMEM;
> +
> + return 0;
> +}
> +EXPORT_SYMBOL(files_cgroup_alloc_fd);
> +
> +void files_cgroup_unalloc_fd(struct files_struct *files, u64 n)
> +{
> + struct files_cgroup *files_cgroup = files_cgroup_from_files(files);
> +
> + res_counter_uncharge(&files_cgroup->open_handles, n);
> +}
> +EXPORT_SYMBOL(files_cgroup_unalloc_fd);
> +
> +static u64 files_limit_read(struct cgroup_subsys_state *css,
> + struct cftype *cft)
> +{
> + struct files_cgroup *fcg = css_fcg(css);
> + return res_counter_read_u64(&fcg->open_handles, RES_LIMIT);
> +}
> +
> +static int files_limit_write(struct cgroup_subsys_state *css,
> + struct cftype *cft, u64 value)
> +{
> + struct files_cgroup *fcg = css_fcg(css);
> + return res_counter_set_limit(&fcg->open_handles, value);
> +}
> +
> +static u64 files_usage_read(struct cgroup_subsys_state *css,
> + struct cftype *cft)
> +{
> + struct files_cgroup *fcg = css_fcg(css);
> + return res_counter_read_u64(&fcg->open_handles, RES_USAGE);
> +}
> +
> +static struct cftype files[] = {
> + {
> + .name = "limit",
> + .read_u64 = files_limit_read,
> + .write_u64 = files_limit_write,
> + },
> + {
> + .name = "usage",
> + .read_u64 = files_usage_read,
> + },
> + { }
> +};
> +
> +struct cgroup_subsys files_subsys = {
> + .name = "files",
> + .css_alloc = files_cgroup_css_alloc,
> + .css_free = files_cgroup_css_free,
> + .can_attach = files_cgroup_can_attach,
> + .attach = files_cgroup_attach,
> + .base_cftypes = files,
> + .subsys_id = files_subsys_id,
> +};
> +
> +void files_cgroup_assign(struct files_struct *files)
> +{
> + struct task_struct *tsk = current;
> + struct cgroup_subsys_state *css;
> +
> + task_lock(tsk);
> + css = task_css(tsk, files_subsys_id);
> + css_get(css);
> + files->files_cgroup = container_of(css, struct files_cgroup, css);
> + task_unlock(tsk);
> +}
> +
> +void files_cgroup_remove(struct files_struct *files)
> +{
> + struct task_struct *tsk = current;
> + struct files_cgroup *fcg;
> +
> + task_lock(tsk);
> + spin_lock(&files->file_lock);
> + fcg = files_cgroup_from_files(files);
> + css_put(&fcg->css);
> + spin_unlock(&files->file_lock);
> + task_unlock(tsk);
> +}
> diff --git a/include/linux/cgroup_subsys.h b/include/linux/cgroup_subsys.h
> index 7b99d71..defadc0 100644
> --- a/include/linux/cgroup_subsys.h
> +++ b/include/linux/cgroup_subsys.h
> @@ -50,6 +50,11 @@ SUBSYS(net_prio)
> #if IS_SUBSYS_ENABLED(CONFIG_CGROUP_HUGETLB)
> SUBSYS(hugetlb)
> #endif
> +
> +#if IS_SUBSYS_ENABLED(CONFIG_CGROUP_FILES)
> +SUBSYS(files)
> +#endif
> +
> /*
> * DO NOT ADD ANY SUBSYSTEM WITHOUT EXPLICIT ACKS FROM CGROUP MAINTAINERS.
> */
> diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h
> index 70e8e21..fee8e45 100644
> --- a/include/linux/fdtable.h
> +++ b/include/linux/fdtable.h
> @@ -57,6 +57,9 @@ struct files_struct {
> unsigned long close_on_exec_init[1];
> unsigned long open_fds_init[1];
> struct file __rcu * fd_array[NR_OPEN_DEFAULT];
> +#ifdef CONFIG_CGROUP_FILES
> + struct files_cgroup *files_cgroup;
> +#endif
> };
>
> struct file_operations;
> diff --git a/include/linux/filescontrol.h b/include/linux/filescontrol.h
> new file mode 100644
> index 0000000..e39ed2a
> --- /dev/null
> +++ b/include/linux/filescontrol.h
> @@ -0,0 +1,32 @@
> +/* filescontrol.h - Files Controller
> + *
> + * Copyright 2014 Google Inc.
> + * Author: Brian Makin <merimus at google.com>
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License as published by
> + * the Free Software Foundation; either version 2 of the License, or
> + * (at your option) any later version.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
> + * GNU General Public License for more details.
> + */
> +
> +#ifndef _LINUX_FILESCONTROL_H
> +#define _LINUX_FILESCONTROL_H
> +
> +#include <linux/fdtable.h>
> +
> +#ifdef CONFIG_CGROUP_FILES
> +
> +extern int files_cgroup_alloc_fd(struct files_struct *files, u64 n);
> +extern void files_cgroup_unalloc_fd(struct files_struct *files, u64 n);
> +extern u64 files_cgroup_count_fds(struct files_struct *files);
> +
> +void files_cgroup_assign(struct files_struct *files);
> +void files_cgroup_remove(struct files_struct *files);
> +
> +#endif /* CONFIG_CGROUP_FILES */
> +#endif /* _LINUX_FILESCONTROL_H */
> diff --git a/init/Kconfig b/init/Kconfig
> index a4b4209..a8c8392 100644
> --- a/init/Kconfig
> +++ b/init/Kconfig
> @@ -1075,6 +1075,13 @@ config DEBUG_BLK_CGROUP
> Enable some debugging help. Currently it exports additional stat
> files in a cgroup which can be useful for debugging.
>
> +config CGROUP_FILES
> + bool "Files Resource Controller for Control Groups"
> + default n
> + help
> + Provides a cgroup resource controller that limits number of open
> + file handles within a cgroup.
> +
> endif # CGROUPS
>
> config CHECKPOINT_RESTORE
>
More information about the linux-yocto
mailing list