[linux-yocto] [PATCH] [RFC] cgroups: Resource controller for open files

He Zhe zhe.he at windriver.com
Wed Jul 30 02:07:32 PDT 2014


On 07/30/2014 11:06 AM, Bruce Ashfield wrote:
> On 2014-07-29, 1:58 AM, zhe.he at windriver.com wrote:
>> From: He Zhe <zhe.he at windriver.com>
>
> How did you extract this patch from the mailing list ? The
> From: field should not be changing, since you aren't the
> original author of the patch.
>

I've corrected this.

>>
>> Add a resource controller for limiting the number of open
>> file handles.  This allows us to catch misbehaving processes
>> and return EMFILE instead of ENOMEM for kernel memory limits.
>>
>> Signed-off-by: Binder Makin <merimus at google.com>
>>
>> Port from lkml: https://lkml.org/lkml/2014/7/2/640
>> Correct wrong handling in fs/file.c:do_dup2
>
> This should be in a separate patch.
>
> So let's see a new send of this, with patch 1/2 being the upstream
> change (with the author information intact) and the 2/2 being any
> fixes you did to the original.
>

I added the following line and my SOB in patch 1/2, is that proper?
lkml https://lkml.org/lkml/2014/7/2/640 upstream

Please check out my latest submit.

Zhe

> Bruce
>
>>
>> Signed-off-by: He Zhe <zhe.he at windriver.com>
>> ---
>>   fs/Makefile                   |    1 +
>>   fs/file.c                     |   46 ++++++++
>>   fs/filescontrol.c             |  249 
>> +++++++++++++++++++++++++++++++++++++++++
>>   include/linux/cgroup_subsys.h |    5 +
>>   include/linux/fdtable.h       |    3 +
>>   include/linux/filescontrol.h  |   32 ++++++
>>   init/Kconfig                  |    7 ++
>>   7 files changed, 343 insertions(+)
>>   create mode 100644 fs/filescontrol.c
>>   create mode 100644 include/linux/filescontrol.h
>>
>> diff --git a/fs/Makefile b/fs/Makefile
>> index ebfe2ee..18eaee0 100644
>> --- a/fs/Makefile
>> +++ b/fs/Makefile
>> @@ -48,6 +48,7 @@ obj-$(CONFIG_COREDUMP)        += coredump.o
>>   obj-$(CONFIG_SYSCTL)        += drop_caches.o
>>
>>   obj-$(CONFIG_FHANDLE)        += fhandle.o
>> +obj-$(CONFIG_CGROUP_FILES)    += filescontrol.o
>>
>>   obj-y                += quota/
>>
>> diff --git a/fs/file.c b/fs/file.c
>> index eb56a13..e615dc9 100644
>> --- a/fs/file.c
>> +++ b/fs/file.c
>> @@ -22,6 +22,7 @@
>>   #include <linux/spinlock.h>
>>   #include <linux/rcupdate.h>
>>   #include <linux/workqueue.h>
>> +#include <linux/filescontrol.h>
>>
>>   int sysctl_nr_open __read_mostly = 1024*1024;
>>   int sysctl_nr_open_min = BITS_PER_LONG;
>> @@ -264,6 +265,9 @@ struct files_struct *dup_fd(struct files_struct 
>> *oldf, int *errorp)
>>       new_fdt->close_on_exec = newf->close_on_exec_init;
>>       new_fdt->open_fds = newf->open_fds_init;
>>       new_fdt->fd = &newf->fd_array[0];
>> +#ifdef CONFIG_CGROUP_FILES
>> +    files_cgroup_assign(newf);
>> +#endif
>>
>>       spin_lock(&oldf->file_lock);
>>       old_fdt = files_fdtable(oldf);
>> @@ -340,9 +344,28 @@ struct files_struct *dup_fd(struct files_struct 
>> *oldf, int *errorp)
>>
>>       rcu_assign_pointer(newf->fdt, new_fdt);
>>
>> +#ifdef CONFIG_CGROUP_FILES
>> +    if (!files_cgroup_alloc_fd(newf, files_cgroup_count_fds(newf)))
>> +        return newf;
>> +
>> +/* could not get enough FD resources.  Need to clean up. */
>> +    new_fds = new_fdt->fd;
>> +    for (i = open_files; i != 0; i--) {
>> +        struct file *f = *new_fds++;
>> +        if (f)
>> +            fput(f);
>> +    }
>> +    if (new_fdt != &newf->fdtab)
>> +        __free_fdtable(new_fdt);
>> +    *errorp = -EMFILE;
>> +#else
>>       return newf;
>> +#endif
>>
>>   out_release:
>> +#ifdef CONFIG_CGROUP_FILES
>> +    files_cgroup_remove(newf);
>> +#endif
>>       kmem_cache_free(files_cachep, newf);
>>   out:
>>       return NULL;
>> @@ -368,6 +391,9 @@ static struct fdtable *close_files(struct 
>> files_struct * files)
>>               if (set & 1) {
>>                   struct file * file = xchg(&fdt->fd[i], NULL);
>>                   if (file) {
>> +#ifdef CONFIG_CGROUP_FILES
>> +                    files_cgroup_unalloc_fd(files, 1);
>> +#endif
>>                       filp_close(file, files);
>>                       cond_resched();
>>                   }
>> @@ -486,6 +512,13 @@ repeat:
>>       if (error)
>>           goto repeat;
>>
>> +#ifdef CONFIG_CGROUP_FILES
>> +    if (files_cgroup_alloc_fd(files, 1)) {
>> +        error = -EMFILE;
>> +        goto out;
>> +    }
>> +#endif
>> +
>>       if (start <= files->next_fd)
>>           files->next_fd = fd + 1;
>>
>> @@ -522,6 +555,10 @@ EXPORT_SYMBOL(get_unused_fd_flags);
>>   static void __put_unused_fd(struct files_struct *files, unsigned 
>> int fd)
>>   {
>>       struct fdtable *fdt = files_fdtable(files);
>> +#ifdef CONFIG_CGROUP_FILES
>> +    if (test_bit(fd, fdt->open_fds))
>> +        files_cgroup_unalloc_fd(files, 1);
>> +#endif
>>       __clear_open_fd(fd, fdt);
>>       if (fd < files->next_fd)
>>           files->next_fd = fd;
>> @@ -780,6 +817,15 @@ static int do_dup2(struct files_struct *files,
>>       tofree = fdt->fd[fd];
>>       if (!tofree && fd_is_open(fd, fdt))
>>           goto Ebusy;
>> +
>> +#ifdef CONFIG_CGROUP_FILES
>> +    if (!tofree)
>> +        if (files_cgroup_alloc_fd(files, 1)) {
>> +            spin_unlock(&files->file_lock);
>> +            return -EMFILE;
>> +        }
>> +#endif
>> +
>>       get_file(file);
>>       rcu_assign_pointer(fdt->fd[fd], file);
>>       __set_open_fd(fd, fdt);
>> diff --git a/fs/filescontrol.c b/fs/filescontrol.c
>> new file mode 100644
>> index 0000000..0ba8ffa
>> --- /dev/null
>> +++ b/fs/filescontrol.c
>> @@ -0,0 +1,249 @@
>> +/* filescontrol.c - Cgroup controller for open file handles.
>> + *
>> + * Copyright 2014 Google Inc.
>> + * Author: Brian Makin <merimus at google.com>
>> + *
>> + * This program is free software; you can redistribute it and/or modify
>> + * it under the terms of the GNU General Public License as published by
>> + * the Free Software Foundation; either version 2 of the License, or
>> + * (at your option) any later version.
>> + *
>> + * This program is distributed in the hope that it will be useful,
>> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
>> + * GNU General Public License for more details.
>> + */
>> +
>> +#include <linux/res_counter.h>
>> +#include <linux/filescontrol.h>
>> +#include <linux/cgroup.h>
>> +#include <linux/export.h>
>> +#include <linux/printk.h>
>> +#include <linux/slab.h>
>> +#include <linux/fs.h>
>> +#include <linux/fdtable.h>
>> +
>> +struct cgroup_subsys files_subsys __read_mostly;
>> +EXPORT_SYMBOL(files_subsys);
>> +
>> +struct files_cgroup {
>> +    struct cgroup_subsys_state css;
>> +    struct res_counter open_handles;
>> +};
>> +
>> +static inline struct files_cgroup *css_fcg(struct 
>> cgroup_subsys_state *css)
>> +{
>> +    return css ? container_of(css, struct files_cgroup, css) : NULL;
>> +}
>> +
>> +static inline struct res_counter *
>> +css_res_open_handles(struct cgroup_subsys_state *css)
>> +{
>> +    return &css_fcg(css)->open_handles;
>> +}
>> +
>> +static inline struct files_cgroup *
>> +files_cgroup_from_files(struct files_struct *files)
>> +{
>> +    return files->files_cgroup;
>> +}
>> +
>> +static struct cgroup_subsys_state *
>> +files_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
>> +{
>> +    struct files_cgroup *fcg;
>> +
>> +    fcg = kzalloc(sizeof(*fcg), GFP_KERNEL);
>> +    if (!fcg)
>> +        goto out;
>> +
>> +    if (!parent_css) {
>> +        res_counter_init(&fcg->open_handles, NULL);
>> +        res_counter_set_limit(&fcg->open_handles, get_max_files());
>> +    } else {
>> +        struct files_cgroup *parent_fcg = css_fcg(parent_css);
>> +        res_counter_init(&fcg->open_handles, 
>> &parent_fcg->open_handles);
>> +        res_counter_set_limit(&fcg->open_handles,
>> + res_counter_read_u64(&parent_fcg->open_handles,
>> +                        RES_LIMIT));
>> +    }
>> +    return &fcg->css;
>> +
>> +out:
>> +    return ERR_PTR(-ENOMEM);
>> +}
>> +
>> +static void files_cgroup_css_free(struct cgroup_subsys_state *css)
>> +{
>> +    kfree(css_fcg(css));
>> +}
>> +
>> +u64 files_cgroup_count_fds(struct files_struct *files)
>> +{
>> +    int i;
>> +    struct fdtable *fdt;
>> +    int retval = 0;
>> +
>> +    fdt = files_fdtable(files);
>> +    for (i = 0; i < DIV_ROUND_UP(fdt->max_fds, BITS_PER_LONG); i++)
>> +        retval += hweight64((__u64)fdt->open_fds[i]);
>> +    return retval;
>> +}
>> +
>> +static u64 files_in_taskset(struct cgroup_taskset *tset)
>> +{
>> +    struct cgroup_subsys_state *css = NULL;
>> +    struct task_struct *task;
>> +    u64 files = 0;
>> +    cgroup_taskset_for_each(task, css, tset) {
>> +        if (!thread_group_leader(task))
>> +            continue;
>> +
>> +        task_lock(task);
>> +        files += files_cgroup_count_fds(task->files);
>> +        task_unlock(task);
>> +    }
>> +    return files;
>> +}
>> +
>> +/*
>> + * If attaching this cgroup would overcommit the resource then deny
>> + * the attach.
>> + */
>> +static int files_cgroup_can_attach(struct cgroup_subsys_state *css,
>> +                struct cgroup_taskset *tset)
>> +{
>> +    u64 files = files_in_taskset(tset);
>> +    if (res_counter_margin(css_res_open_handles(css)) < files)
>> +        return -ENOMEM;
>> +    return 0;
>> +}
>> +
>> +/*
>> + * If resource counts have gone up between can_attach and attach then
>> + * this may overcommit resources.  In that case just deny further 
>> allocation
>> + * until the resource usage drops.
>> + */
>> +static void files_cgroup_attach(struct cgroup_subsys_state *to_css,
>> +                struct cgroup_taskset *tset)
>> +{
>> +    u64 num_files;
>> +    struct task_struct *task = cgroup_taskset_first(tset);
>> +    struct cgroup_subsys_state *from_css;
>> +    struct res_counter *from_res;
>> +    struct res_counter *to_res = css_res_open_handles(to_css);
>> +    struct res_counter *fail_res;
>> +    struct files_struct *files;
>> +
>> +    task_lock(task);
>> +    files = task->files;
>> +    if (!files) {
>> +        task_unlock(task);
>> +        return;
>> +    }
>> +
>> +    from_css = &files_cgroup_from_files(files)->css;
>> +    from_res = css_res_open_handles(from_css);
>> +
>> +    spin_lock(&files->file_lock);
>> +    num_files = files_cgroup_count_fds(files);
>> +    res_counter_uncharge(from_res, num_files);
>> +    css_put(from_css);
>> +
>> +    if (res_counter_charge(to_res, num_files, &fail_res))
>> +        pr_err("Open files limit overcommited\n");
>> +    css_get(to_css);
>> +
>> +    task->files->files_cgroup = css_fcg(to_css);
>> +    spin_unlock(&files->file_lock);
>> +    task_unlock(task);
>> +}
>> +
>> +int files_cgroup_alloc_fd(struct files_struct *files, u64 n)
>> +{
>> +    struct res_counter *fail_res;
>> +    struct files_cgroup *files_cgroup = files_cgroup_from_files(files);
>> +
>> +    if (res_counter_charge(&files_cgroup->open_handles, n, &fail_res))
>> +        return -ENOMEM;
>> +
>> +    return 0;
>> +}
>> +EXPORT_SYMBOL(files_cgroup_alloc_fd);
>> +
>> +void files_cgroup_unalloc_fd(struct files_struct *files, u64 n)
>> +{
>> +    struct files_cgroup *files_cgroup = files_cgroup_from_files(files);
>> +
>> +    res_counter_uncharge(&files_cgroup->open_handles, n);
>> +}
>> +EXPORT_SYMBOL(files_cgroup_unalloc_fd);
>> +
>> +static u64 files_limit_read(struct cgroup_subsys_state *css,
>> +            struct cftype *cft)
>> +{
>> +    struct files_cgroup *fcg = css_fcg(css);
>> +    return res_counter_read_u64(&fcg->open_handles, RES_LIMIT);
>> +}
>> +
>> +static int files_limit_write(struct cgroup_subsys_state *css,
>> +            struct cftype *cft, u64 value)
>> +{
>> +    struct files_cgroup *fcg = css_fcg(css);
>> +    return res_counter_set_limit(&fcg->open_handles, value);
>> +}
>> +
>> +static u64 files_usage_read(struct cgroup_subsys_state *css,
>> +            struct cftype *cft)
>> +{
>> +    struct files_cgroup *fcg = css_fcg(css);
>> +    return res_counter_read_u64(&fcg->open_handles, RES_USAGE);
>> +}
>> +
>> +static struct cftype files[] = {
>> +    {
>> +        .name = "limit",
>> +        .read_u64 = files_limit_read,
>> +        .write_u64 = files_limit_write,
>> +    },
>> +    {
>> +        .name = "usage",
>> +        .read_u64 = files_usage_read,
>> +    },
>> +    { }
>> +};
>> +
>> +struct cgroup_subsys files_subsys = {
>> +    .name = "files",
>> +    .css_alloc = files_cgroup_css_alloc,
>> +    .css_free = files_cgroup_css_free,
>> +    .can_attach = files_cgroup_can_attach,
>> +    .attach = files_cgroup_attach,
>> +    .base_cftypes = files,
>> +    .subsys_id = files_subsys_id,
>> +};
>> +
>> +void files_cgroup_assign(struct files_struct *files)
>> +{
>> +    struct task_struct *tsk = current;
>> +    struct cgroup_subsys_state *css;
>> +
>> +    task_lock(tsk);
>> +    css = task_css(tsk, files_subsys_id);
>> +    css_get(css);
>> +    files->files_cgroup = container_of(css, struct files_cgroup, css);
>> +    task_unlock(tsk);
>> +}
>> +
>> +void files_cgroup_remove(struct files_struct *files)
>> +{
>> +    struct task_struct *tsk = current;
>> +    struct files_cgroup *fcg;
>> +
>> +    task_lock(tsk);
>> +    spin_lock(&files->file_lock);
>> +    fcg = files_cgroup_from_files(files);
>> +    css_put(&fcg->css);
>> +    spin_unlock(&files->file_lock);
>> +    task_unlock(tsk);
>> +}
>> diff --git a/include/linux/cgroup_subsys.h 
>> b/include/linux/cgroup_subsys.h
>> index 7b99d71..defadc0 100644
>> --- a/include/linux/cgroup_subsys.h
>> +++ b/include/linux/cgroup_subsys.h
>> @@ -50,6 +50,11 @@ SUBSYS(net_prio)
>>   #if IS_SUBSYS_ENABLED(CONFIG_CGROUP_HUGETLB)
>>   SUBSYS(hugetlb)
>>   #endif
>> +
>> +#if IS_SUBSYS_ENABLED(CONFIG_CGROUP_FILES)
>> +SUBSYS(files)
>> +#endif
>> +
>>   /*
>>    * DO NOT ADD ANY SUBSYSTEM WITHOUT EXPLICIT ACKS FROM CGROUP 
>> MAINTAINERS.
>>    */
>> diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h
>> index 70e8e21..fee8e45 100644
>> --- a/include/linux/fdtable.h
>> +++ b/include/linux/fdtable.h
>> @@ -57,6 +57,9 @@ struct files_struct {
>>       unsigned long close_on_exec_init[1];
>>       unsigned long open_fds_init[1];
>>       struct file __rcu * fd_array[NR_OPEN_DEFAULT];
>> +#ifdef CONFIG_CGROUP_FILES
>> +    struct files_cgroup *files_cgroup;
>> +#endif
>>   };
>>
>>   struct file_operations;
>> diff --git a/include/linux/filescontrol.h b/include/linux/filescontrol.h
>> new file mode 100644
>> index 0000000..e39ed2a
>> --- /dev/null
>> +++ b/include/linux/filescontrol.h
>> @@ -0,0 +1,32 @@
>> +/* filescontrol.h - Files Controller
>> + *
>> + * Copyright 2014 Google Inc.
>> + * Author: Brian Makin <merimus at google.com>
>> + *
>> + * This program is free software; you can redistribute it and/or modify
>> + * it under the terms of the GNU General Public License as published by
>> + * the Free Software Foundation; either version 2 of the License, or
>> + * (at your option) any later version.
>> + *
>> + * This program is distributed in the hope that it will be useful,
>> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
>> + * GNU General Public License for more details.
>> + */
>> +
>> +#ifndef _LINUX_FILESCONTROL_H
>> +#define _LINUX_FILESCONTROL_H
>> +
>> +#include <linux/fdtable.h>
>> +
>> +#ifdef CONFIG_CGROUP_FILES
>> +
>> +extern int files_cgroup_alloc_fd(struct files_struct *files, u64 n);
>> +extern void files_cgroup_unalloc_fd(struct files_struct *files, u64 n);
>> +extern u64 files_cgroup_count_fds(struct files_struct *files);
>> +
>> +void files_cgroup_assign(struct files_struct *files);
>> +void files_cgroup_remove(struct files_struct *files);
>> +
>> +#endif /* CONFIG_CGROUP_FILES */
>> +#endif /* _LINUX_FILESCONTROL_H */
>> diff --git a/init/Kconfig b/init/Kconfig
>> index a4b4209..a8c8392 100644
>> --- a/init/Kconfig
>> +++ b/init/Kconfig
>> @@ -1075,6 +1075,13 @@ config DEBUG_BLK_CGROUP
>>       Enable some debugging help. Currently it exports additional stat
>>       files in a cgroup which can be useful for debugging.
>>
>> +config CGROUP_FILES
>> +    bool "Files Resource Controller for Control Groups"
>> +    default n
>> +    help
>> +      Provides a cgroup resource controller that limits number of open
>> +      file handles within a cgroup.
>> +
>>   endif # CGROUPS
>>
>>   config CHECKPOINT_RESTORE
>>
>



More information about the linux-yocto mailing list