1

sched_ext: Fixes for v6.12-rc1

- Three build fixes.
 
 - The fix for a stall bug introduced by a recent optimization in sched core
   (SM_IDLE).
 
 - Addition of /sys/kernel/sched_ext/enable_seq. While not a fix, it is a
   simple addition that distro people want to be able to tell whether an SCX
   scheduler has ever been loaded on the system.
 -----BEGIN PGP SIGNATURE-----
 
 iIQEABYKACwWIQTfIjM1kS57o3GsC/uxYfJx3gVYGQUCZvGekA4cdGpAa2VybmVs
 Lm9yZwAKCRCxYfJx3gVYGdkDAP46Wbz7XOTIJHs4NV3sxAH1Kk3bmZHtzB0C0zb6
 FChT3QEAzHFtY+mCtc/qJ6IMKizTDcgQ6V8zbCtXNuVxXxXMrAY=
 =uVPP
 -----END PGP SIGNATURE-----

Merge tag 'sched_ext-for-6.12-rc1-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/sched_ext

Pull sched_ext fixes from Tejun Heo:

 - Three build fixes

 - The fix for a stall bug introduced by a recent optimization in sched
   core (SM_IDLE)

 - Addition of /sys/kernel/sched_ext/enable_seq. While not a fix, it is
   a simple addition that distro people want to be able to tell whether
   an SCX scheduler has ever been loaded on the system

* tag 'sched_ext-for-6.12-rc1-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/sched_ext:
  sched_ext: Provide a sysfs enable_seq counter
  sched_ext: Fix build when !CONFIG_STACKTRACE
  sched, sched_ext: Disable SM_IDLE/rq empty path when scx_enabled()
  sched: Put task_group::idle under CONFIG_GROUP_SCHED_WEIGHT
  sched: Add dummy version of sched_group_set_idle()
This commit is contained in:
Linus Torvalds 2024-09-24 11:33:50 -07:00
commit 6fa6588e59
5 changed files with 40 additions and 8 deletions

View File

@ -83,6 +83,15 @@ The current status of the BPF scheduler can be determined as follows:
# cat /sys/kernel/sched_ext/root/ops # cat /sys/kernel/sched_ext/root/ops
simple simple
You can check if any BPF scheduler has ever been loaded since boot by examining
this monotonically incrementing counter (a value of zero indicates that no BPF
scheduler has been loaded):
.. code-block:: none
# cat /sys/kernel/sched_ext/enable_seq
1
``tools/sched_ext/scx_show_state.py`` is a drgn script which shows more ``tools/sched_ext/scx_show_state.py`` is a drgn script which shows more
detailed information: detailed information:
@ -96,6 +105,7 @@ detailed information:
enable_state : enabled (2) enable_state : enabled (2)
bypass_depth : 0 bypass_depth : 0
nr_rejected : 0 nr_rejected : 0
enable_seq : 1
If ``CONFIG_SCHED_DEBUG`` is set, whether a given task is on sched_ext can If ``CONFIG_SCHED_DEBUG`` is set, whether a given task is on sched_ext can
be determined as follows: be determined as follows:

View File

@ -6591,7 +6591,8 @@ static void __sched notrace __schedule(int sched_mode)
*/ */
prev_state = READ_ONCE(prev->__state); prev_state = READ_ONCE(prev->__state);
if (sched_mode == SM_IDLE) { if (sched_mode == SM_IDLE) {
if (!rq->nr_running) { /* SCX must consult the BPF scheduler to tell if rq is empty */
if (!rq->nr_running && !scx_enabled()) {
next = prev; next = prev;
goto picked; goto picked;
} }

View File

@ -874,6 +874,13 @@ static struct scx_exit_info *scx_exit_info;
static atomic_long_t scx_nr_rejected = ATOMIC_LONG_INIT(0); static atomic_long_t scx_nr_rejected = ATOMIC_LONG_INIT(0);
static atomic_long_t scx_hotplug_seq = ATOMIC_LONG_INIT(0); static atomic_long_t scx_hotplug_seq = ATOMIC_LONG_INIT(0);
/*
* A monotically increasing sequence number that is incremented every time a
* scheduler is enabled. This can be used by to check if any custom sched_ext
* scheduler has ever been used in the system.
*/
static atomic_long_t scx_enable_seq = ATOMIC_LONG_INIT(0);
/* /*
* The maximum amount of time in jiffies that a task may be runnable without * The maximum amount of time in jiffies that a task may be runnable without
* being scheduled on a CPU. If this timeout is exceeded, it will trigger * being scheduled on a CPU. If this timeout is exceeded, it will trigger
@ -4154,11 +4161,19 @@ static ssize_t scx_attr_hotplug_seq_show(struct kobject *kobj,
} }
SCX_ATTR(hotplug_seq); SCX_ATTR(hotplug_seq);
static ssize_t scx_attr_enable_seq_show(struct kobject *kobj,
struct kobj_attribute *ka, char *buf)
{
return sysfs_emit(buf, "%ld\n", atomic_long_read(&scx_enable_seq));
}
SCX_ATTR(enable_seq);
static struct attribute *scx_global_attrs[] = { static struct attribute *scx_global_attrs[] = {
&scx_attr_state.attr, &scx_attr_state.attr,
&scx_attr_switch_all.attr, &scx_attr_switch_all.attr,
&scx_attr_nr_rejected.attr, &scx_attr_nr_rejected.attr,
&scx_attr_hotplug_seq.attr, &scx_attr_hotplug_seq.attr,
&scx_attr_enable_seq.attr,
NULL, NULL,
}; };
@ -4469,8 +4484,9 @@ static void scx_ops_disable_workfn(struct kthread_work *work)
if (ei->msg[0] != '\0') if (ei->msg[0] != '\0')
pr_err("sched_ext: %s: %s\n", scx_ops.name, ei->msg); pr_err("sched_ext: %s: %s\n", scx_ops.name, ei->msg);
#ifdef CONFIG_STACKTRACE
stack_trace_print(ei->bt, ei->bt_len, 2); stack_trace_print(ei->bt, ei->bt_len, 2);
#endif
} else { } else {
pr_info("sched_ext: BPF scheduler \"%s\" disabled (%s)\n", pr_info("sched_ext: BPF scheduler \"%s\" disabled (%s)\n",
scx_ops.name, ei->reason); scx_ops.name, ei->reason);
@ -4847,10 +4863,10 @@ static __printf(3, 4) void scx_ops_exit_kind(enum scx_exit_kind kind,
return; return;
ei->exit_code = exit_code; ei->exit_code = exit_code;
#ifdef CONFIG_STACKTRACE
if (kind >= SCX_EXIT_ERROR) if (kind >= SCX_EXIT_ERROR)
ei->bt_len = stack_trace_save(ei->bt, SCX_EXIT_BT_LEN, 1); ei->bt_len = stack_trace_save(ei->bt, SCX_EXIT_BT_LEN, 1);
#endif
va_start(args, fmt); va_start(args, fmt);
vscnprintf(ei->msg, SCX_EXIT_MSG_LEN, fmt, args); vscnprintf(ei->msg, SCX_EXIT_MSG_LEN, fmt, args);
va_end(args); va_end(args);
@ -5176,6 +5192,8 @@ static int scx_ops_enable(struct sched_ext_ops *ops, struct bpf_link *link)
kobject_uevent(scx_root_kobj, KOBJ_ADD); kobject_uevent(scx_root_kobj, KOBJ_ADD);
mutex_unlock(&scx_ops_enable_mutex); mutex_unlock(&scx_ops_enable_mutex);
atomic_long_inc(&scx_enable_seq);
return 0; return 0;
err_del: err_del:

View File

@ -432,16 +432,17 @@ struct cfs_bandwidth {
struct task_group { struct task_group {
struct cgroup_subsys_state css; struct cgroup_subsys_state css;
#ifdef CONFIG_GROUP_SCHED_WEIGHT
/* A positive value indicates that this is a SCHED_IDLE group. */
int idle;
#endif
#ifdef CONFIG_FAIR_GROUP_SCHED #ifdef CONFIG_FAIR_GROUP_SCHED
/* schedulable entities of this group on each CPU */ /* schedulable entities of this group on each CPU */
struct sched_entity **se; struct sched_entity **se;
/* runqueue "owned" by this group on each CPU */ /* runqueue "owned" by this group on each CPU */
struct cfs_rq **cfs_rq; struct cfs_rq **cfs_rq;
unsigned long shares; unsigned long shares;
/* A positive value indicates that this is a SCHED_IDLE group. */
int idle;
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
/* /*
* load_avg can be heavily contended at clock tick time, so put * load_avg can be heavily contended at clock tick time, so put
@ -582,6 +583,7 @@ static inline void set_task_rq_fair(struct sched_entity *se,
#endif /* CONFIG_SMP */ #endif /* CONFIG_SMP */
#else /* !CONFIG_FAIR_GROUP_SCHED */ #else /* !CONFIG_FAIR_GROUP_SCHED */
static inline int sched_group_set_shares(struct task_group *tg, unsigned long shares) { return 0; } static inline int sched_group_set_shares(struct task_group *tg, unsigned long shares) { return 0; }
static inline int sched_group_set_idle(struct task_group *tg, long idle) { return 0; }
#endif /* CONFIG_FAIR_GROUP_SCHED */ #endif /* CONFIG_FAIR_GROUP_SCHED */
#else /* CONFIG_CGROUP_SCHED */ #else /* CONFIG_CGROUP_SCHED */

View File

@ -37,3 +37,4 @@ print(f'switched_all : {read_static_key("__scx_switched_all")}')
print(f'enable_state : {ops_state_str(enable_state)} ({enable_state})') print(f'enable_state : {ops_state_str(enable_state)} ({enable_state})')
print(f'bypass_depth : {read_atomic("scx_ops_bypass_depth")}') print(f'bypass_depth : {read_atomic("scx_ops_bypass_depth")}')
print(f'nr_rejected : {read_atomic("scx_nr_rejected")}') print(f'nr_rejected : {read_atomic("scx_nr_rejected")}')
print(f'enable_seq : {read_atomic("scx_enable_seq")}')