cgroup/pids: Separate semantics of pids.events related to pids.max
Currently, when pids.max limit is breached in the hierarchy, the event is counted and reported in the cgroup where the forking task resides. This decouples the limit and the notification caused by the limit making it hard to detect when the actual limit was effected. Redefine the pids.events:max as: the number of times the limit of the cgroup was hit. (Implementation differentiates also "forkfail" event but this is currently not exposed as it would better fit into pids.stat. It also differs from pids.events:max only when pids.max is configured on non-leaf cgroups.) Since it changes semantics of the original "max" event, introduce this change only in the v2 API of the controller and add a cgroup2 mount option to revert to the legacy behavior. Signed-off-by: Michal Koutný <mkoutny@suse.com> Signed-off-by: Tejun Heo <tj@kernel.org>
This commit is contained in:
parent
0ac380020c
commit
73e75e6fc3
@ -36,7 +36,8 @@ superset of parent/child/pids.current.
|
|||||||
|
|
||||||
The pids.events file contains event counters:
|
The pids.events file contains event counters:
|
||||||
|
|
||||||
- max: Number of times fork failed because limit was hit.
|
- max: Number of times fork failed in the cgroup because limit was hit in
|
||||||
|
self or ancestors.
|
||||||
|
|
||||||
Example
|
Example
|
||||||
-------
|
-------
|
||||||
|
@ -239,6 +239,10 @@ cgroup v2 currently supports the following mount options.
|
|||||||
will not be tracked by the memory controller (even if cgroup
|
will not be tracked by the memory controller (even if cgroup
|
||||||
v2 is remounted later on).
|
v2 is remounted later on).
|
||||||
|
|
||||||
|
pids_localevents
|
||||||
|
Represent fork failures inside cgroup's pids.events:max (v1 behavior),
|
||||||
|
not its limit being hit (v2 behavior).
|
||||||
|
|
||||||
|
|
||||||
Organizing Processes and Threads
|
Organizing Processes and Threads
|
||||||
--------------------------------
|
--------------------------------
|
||||||
@ -2205,12 +2209,13 @@ PID Interface Files
|
|||||||
descendants has ever reached.
|
descendants has ever reached.
|
||||||
|
|
||||||
pids.events
|
pids.events
|
||||||
A read-only flat-keyed file which exists on non-root cgroups. The
|
A read-only flat-keyed file which exists on non-root cgroups. Unless
|
||||||
following entries are defined. Unless specified otherwise, a value
|
specified otherwise, a value change in this file generates a file
|
||||||
change in this file generates a file modified event.
|
modified event. The following entries are defined.
|
||||||
|
|
||||||
max
|
max
|
||||||
Number of times fork failed because limit was hit.
|
The number of times the cgroup's number of processes hit the
|
||||||
|
limit (see also pids_localevents).
|
||||||
|
|
||||||
Organisational operations are not blocked by cgroup policies, so it is
|
Organisational operations are not blocked by cgroup policies, so it is
|
||||||
possible to have pids.current > pids.max. This can be done by either
|
possible to have pids.current > pids.max. This can be done by either
|
||||||
|
@ -119,7 +119,12 @@ enum {
|
|||||||
/*
|
/*
|
||||||
* Enable hugetlb accounting for the memory controller.
|
* Enable hugetlb accounting for the memory controller.
|
||||||
*/
|
*/
|
||||||
CGRP_ROOT_MEMORY_HUGETLB_ACCOUNTING = (1 << 19),
|
CGRP_ROOT_MEMORY_HUGETLB_ACCOUNTING = (1 << 19),
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Enable legacy local pids.events.
|
||||||
|
*/
|
||||||
|
CGRP_ROOT_PIDS_LOCAL_EVENTS = (1 << 20),
|
||||||
};
|
};
|
||||||
|
|
||||||
/* cftype->flags */
|
/* cftype->flags */
|
||||||
|
@ -1922,6 +1922,7 @@ enum cgroup2_param {
|
|||||||
Opt_memory_localevents,
|
Opt_memory_localevents,
|
||||||
Opt_memory_recursiveprot,
|
Opt_memory_recursiveprot,
|
||||||
Opt_memory_hugetlb_accounting,
|
Opt_memory_hugetlb_accounting,
|
||||||
|
Opt_pids_localevents,
|
||||||
nr__cgroup2_params
|
nr__cgroup2_params
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -1931,6 +1932,7 @@ static const struct fs_parameter_spec cgroup2_fs_parameters[] = {
|
|||||||
fsparam_flag("memory_localevents", Opt_memory_localevents),
|
fsparam_flag("memory_localevents", Opt_memory_localevents),
|
||||||
fsparam_flag("memory_recursiveprot", Opt_memory_recursiveprot),
|
fsparam_flag("memory_recursiveprot", Opt_memory_recursiveprot),
|
||||||
fsparam_flag("memory_hugetlb_accounting", Opt_memory_hugetlb_accounting),
|
fsparam_flag("memory_hugetlb_accounting", Opt_memory_hugetlb_accounting),
|
||||||
|
fsparam_flag("pids_localevents", Opt_pids_localevents),
|
||||||
{}
|
{}
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -1960,6 +1962,9 @@ static int cgroup2_parse_param(struct fs_context *fc, struct fs_parameter *param
|
|||||||
case Opt_memory_hugetlb_accounting:
|
case Opt_memory_hugetlb_accounting:
|
||||||
ctx->flags |= CGRP_ROOT_MEMORY_HUGETLB_ACCOUNTING;
|
ctx->flags |= CGRP_ROOT_MEMORY_HUGETLB_ACCOUNTING;
|
||||||
return 0;
|
return 0;
|
||||||
|
case Opt_pids_localevents:
|
||||||
|
ctx->flags |= CGRP_ROOT_PIDS_LOCAL_EVENTS;
|
||||||
|
return 0;
|
||||||
}
|
}
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
}
|
}
|
||||||
@ -1989,6 +1994,11 @@ static void apply_cgroup_root_flags(unsigned int root_flags)
|
|||||||
cgrp_dfl_root.flags |= CGRP_ROOT_MEMORY_HUGETLB_ACCOUNTING;
|
cgrp_dfl_root.flags |= CGRP_ROOT_MEMORY_HUGETLB_ACCOUNTING;
|
||||||
else
|
else
|
||||||
cgrp_dfl_root.flags &= ~CGRP_ROOT_MEMORY_HUGETLB_ACCOUNTING;
|
cgrp_dfl_root.flags &= ~CGRP_ROOT_MEMORY_HUGETLB_ACCOUNTING;
|
||||||
|
|
||||||
|
if (root_flags & CGRP_ROOT_PIDS_LOCAL_EVENTS)
|
||||||
|
cgrp_dfl_root.flags |= CGRP_ROOT_PIDS_LOCAL_EVENTS;
|
||||||
|
else
|
||||||
|
cgrp_dfl_root.flags &= ~CGRP_ROOT_PIDS_LOCAL_EVENTS;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2004,6 +2014,8 @@ static int cgroup_show_options(struct seq_file *seq, struct kernfs_root *kf_root
|
|||||||
seq_puts(seq, ",memory_recursiveprot");
|
seq_puts(seq, ",memory_recursiveprot");
|
||||||
if (cgrp_dfl_root.flags & CGRP_ROOT_MEMORY_HUGETLB_ACCOUNTING)
|
if (cgrp_dfl_root.flags & CGRP_ROOT_MEMORY_HUGETLB_ACCOUNTING)
|
||||||
seq_puts(seq, ",memory_hugetlb_accounting");
|
seq_puts(seq, ",memory_hugetlb_accounting");
|
||||||
|
if (cgrp_dfl_root.flags & CGRP_ROOT_PIDS_LOCAL_EVENTS)
|
||||||
|
seq_puts(seq, ",pids_localevents");
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -7062,7 +7074,8 @@ static ssize_t features_show(struct kobject *kobj, struct kobj_attribute *attr,
|
|||||||
"favordynmods\n"
|
"favordynmods\n"
|
||||||
"memory_localevents\n"
|
"memory_localevents\n"
|
||||||
"memory_recursiveprot\n"
|
"memory_recursiveprot\n"
|
||||||
"memory_hugetlb_accounting\n");
|
"memory_hugetlb_accounting\n"
|
||||||
|
"pids_localevents\n");
|
||||||
}
|
}
|
||||||
static struct kobj_attribute cgroup_features_attr = __ATTR_RO(features);
|
static struct kobj_attribute cgroup_features_attr = __ATTR_RO(features);
|
||||||
|
|
||||||
|
@ -38,6 +38,14 @@
|
|||||||
#define PIDS_MAX (PID_MAX_LIMIT + 1ULL)
|
#define PIDS_MAX (PID_MAX_LIMIT + 1ULL)
|
||||||
#define PIDS_MAX_STR "max"
|
#define PIDS_MAX_STR "max"
|
||||||
|
|
||||||
|
enum pidcg_event {
|
||||||
|
/* Fork failed in subtree because this pids_cgroup limit was hit. */
|
||||||
|
PIDCG_MAX,
|
||||||
|
/* Fork failed in this pids_cgroup because ancestor limit was hit. */
|
||||||
|
PIDCG_FORKFAIL,
|
||||||
|
NR_PIDCG_EVENTS,
|
||||||
|
};
|
||||||
|
|
||||||
struct pids_cgroup {
|
struct pids_cgroup {
|
||||||
struct cgroup_subsys_state css;
|
struct cgroup_subsys_state css;
|
||||||
|
|
||||||
@ -52,8 +60,7 @@ struct pids_cgroup {
|
|||||||
/* Handle for "pids.events" */
|
/* Handle for "pids.events" */
|
||||||
struct cgroup_file events_file;
|
struct cgroup_file events_file;
|
||||||
|
|
||||||
/* Number of times fork failed because limit was hit. */
|
atomic64_t events[NR_PIDCG_EVENTS];
|
||||||
atomic64_t events_limit;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
static struct pids_cgroup *css_pids(struct cgroup_subsys_state *css)
|
static struct pids_cgroup *css_pids(struct cgroup_subsys_state *css)
|
||||||
@ -148,12 +155,13 @@ static void pids_charge(struct pids_cgroup *pids, int num)
|
|||||||
* pids_try_charge - hierarchically try to charge the pid count
|
* pids_try_charge - hierarchically try to charge the pid count
|
||||||
* @pids: the pid cgroup state
|
* @pids: the pid cgroup state
|
||||||
* @num: the number of pids to charge
|
* @num: the number of pids to charge
|
||||||
|
* @fail: storage of pid cgroup causing the fail
|
||||||
*
|
*
|
||||||
* This function follows the set limit. It will fail if the charge would cause
|
* This function follows the set limit. It will fail if the charge would cause
|
||||||
* the new value to exceed the hierarchical limit. Returns 0 if the charge
|
* the new value to exceed the hierarchical limit. Returns 0 if the charge
|
||||||
* succeeded, otherwise -EAGAIN.
|
* succeeded, otherwise -EAGAIN.
|
||||||
*/
|
*/
|
||||||
static int pids_try_charge(struct pids_cgroup *pids, int num)
|
static int pids_try_charge(struct pids_cgroup *pids, int num, struct pids_cgroup **fail)
|
||||||
{
|
{
|
||||||
struct pids_cgroup *p, *q;
|
struct pids_cgroup *p, *q;
|
||||||
|
|
||||||
@ -166,9 +174,10 @@ static int pids_try_charge(struct pids_cgroup *pids, int num)
|
|||||||
* p->limit is %PIDS_MAX then we know that this test will never
|
* p->limit is %PIDS_MAX then we know that this test will never
|
||||||
* fail.
|
* fail.
|
||||||
*/
|
*/
|
||||||
if (new > limit)
|
if (new > limit) {
|
||||||
|
*fail = p;
|
||||||
goto revert;
|
goto revert;
|
||||||
|
}
|
||||||
/*
|
/*
|
||||||
* Not technically accurate if we go over limit somewhere up
|
* Not technically accurate if we go over limit somewhere up
|
||||||
* the hierarchy, but that's tolerable for the watermark.
|
* the hierarchy, but that's tolerable for the watermark.
|
||||||
@ -236,7 +245,7 @@ static void pids_cancel_attach(struct cgroup_taskset *tset)
|
|||||||
static int pids_can_fork(struct task_struct *task, struct css_set *cset)
|
static int pids_can_fork(struct task_struct *task, struct css_set *cset)
|
||||||
{
|
{
|
||||||
struct cgroup_subsys_state *css;
|
struct cgroup_subsys_state *css;
|
||||||
struct pids_cgroup *pids;
|
struct pids_cgroup *pids, *pids_over_limit;
|
||||||
int err;
|
int err;
|
||||||
|
|
||||||
if (cset)
|
if (cset)
|
||||||
@ -244,15 +253,23 @@ static int pids_can_fork(struct task_struct *task, struct css_set *cset)
|
|||||||
else
|
else
|
||||||
css = task_css_check(current, pids_cgrp_id, true);
|
css = task_css_check(current, pids_cgrp_id, true);
|
||||||
pids = css_pids(css);
|
pids = css_pids(css);
|
||||||
err = pids_try_charge(pids, 1);
|
err = pids_try_charge(pids, 1, &pids_over_limit);
|
||||||
if (err) {
|
if (err) {
|
||||||
/* Only log the first time events_limit is incremented. */
|
/* compatibility on v1 where events were notified in leaves. */
|
||||||
if (atomic64_inc_return(&pids->events_limit) == 1) {
|
if (!cgroup_subsys_on_dfl(pids_cgrp_subsys))
|
||||||
|
pids_over_limit = pids;
|
||||||
|
|
||||||
|
/* Only log the first time limit is hit. */
|
||||||
|
if (atomic64_inc_return(&pids->events[PIDCG_FORKFAIL]) == 1) {
|
||||||
pr_info("cgroup: fork rejected by pids controller in ");
|
pr_info("cgroup: fork rejected by pids controller in ");
|
||||||
pr_cont_cgroup_path(css->cgroup);
|
pr_cont_cgroup_path(pids->css.cgroup);
|
||||||
pr_cont("\n");
|
pr_cont("\n");
|
||||||
}
|
}
|
||||||
|
atomic64_inc(&pids_over_limit->events[PIDCG_MAX]);
|
||||||
|
|
||||||
cgroup_file_notify(&pids->events_file);
|
cgroup_file_notify(&pids->events_file);
|
||||||
|
if (pids_over_limit != pids)
|
||||||
|
cgroup_file_notify(&pids_over_limit->events_file);
|
||||||
}
|
}
|
||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
@ -340,8 +357,13 @@ static s64 pids_peak_read(struct cgroup_subsys_state *css,
|
|||||||
static int pids_events_show(struct seq_file *sf, void *v)
|
static int pids_events_show(struct seq_file *sf, void *v)
|
||||||
{
|
{
|
||||||
struct pids_cgroup *pids = css_pids(seq_css(sf));
|
struct pids_cgroup *pids = css_pids(seq_css(sf));
|
||||||
|
enum pidcg_event pe = PIDCG_MAX;
|
||||||
|
|
||||||
seq_printf(sf, "max %lld\n", (s64)atomic64_read(&pids->events_limit));
|
if (!cgroup_subsys_on_dfl(pids_cgrp_subsys) ||
|
||||||
|
cgrp_dfl_root.flags & CGRP_ROOT_PIDS_LOCAL_EVENTS)
|
||||||
|
pe = PIDCG_FORKFAIL;
|
||||||
|
|
||||||
|
seq_printf(sf, "max %lld\n", (s64)atomic64_read(&pids->events[pe]));
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user