sched_ext: Fixes for v6.12-rc1

- Three build fixes. - The fix for a stall bug introduced by a recent optimization in sched core (SM_IDLE). - Addition of /sys/kernel/sched_ext/enable_seq. While not a fix, it is a simple addition that distro people want to be able to tell whether an SCX scheduler has ever been loaded on the system. -----BEGIN PGP SIGNATURE----- iIQEABYKACwWIQTfIjM1kS57o3GsC/uxYfJx3gVYGQUCZvGekA4cdGpAa2VybmVs Lm9yZwAKCRCxYfJx3gVYGdkDAP46Wbz7XOTIJHs4NV3sxAH1Kk3bmZHtzB0C0zb6 FChT3QEAzHFtY+mCtc/qJ6IMKizTDcgQ6V8zbCtXNuVxXxXMrAY= =uVPP -----END PGP SIGNATURE----- Merge tag 'sched_ext-for-6.12-rc1-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/sched_ext Pull sched_ext fixes from Tejun Heo: - Three build fixes - The fix for a stall bug introduced by a recent optimization in sched core (SM_IDLE) - Addition of /sys/kernel/sched_ext/enable_seq. While not a fix, it is a simple addition that distro people want to be able to tell whether an SCX scheduler has ever been loaded on the system * tag 'sched_ext-for-6.12-rc1-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/sched_ext: sched_ext: Provide a sysfs enable_seq counter sched_ext: Fix build when !CONFIG_STACKTRACE sched, sched_ext: Disable SM_IDLE/rq empty path when scx_enabled() sched: Put task_group::idle under CONFIG_GROUP_SCHED_WEIGHT sched: Add dummy version of sched_group_set_idle()
2024-09-24 11:33:50 -07:00 · 2024-09-24 11:33:50 -07:00 · 6fa6588e59
commit 6fa6588e59
parent 3147a0689d 431844b65f
5 changed files with 40 additions and 8 deletions
--- a/Documentation/scheduler/sched-ext.rst
+++ b/Documentation/scheduler/sched-ext.rst
@ -83,6 +83,15 @@ The current status of the BPF scheduler can be determined as follows:
    # cat /sys/kernel/sched_ext/root/ops
    simple
 You can check if any BPF scheduler has ever been loaded since boot by examining
 this monotonically incrementing counter (a value of zero indicates that no BPF
 scheduler has been loaded):
 .. code-block:: none
    # cat /sys/kernel/sched_ext/enable_seq
    1
 ``tools/sched_ext/scx_show_state.py`` is a drgn script which shows more
 detailed information:
@ -96,6 +105,7 @@ detailed information:
    enable_state  : enabled (2)
    bypass_depth  : 0
    nr_rejected   : 0
    enable_seq    : 1
 If ``CONFIG_SCHED_DEBUG`` is set, whether a given task is on sched_ext can
 be determined as follows:
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@ -6591,7 +6591,8 @@ static void __sched notrace __schedule(int sched_mode)
 	 */
 	prev_state = READ_ONCE(prev->__state);
 	if (sched_mode == SM_IDLE) {
-		if (!rq->nr_running) {
+		/* SCX must consult the BPF scheduler to tell if rq is empty */
 		if (!rq->nr_running && !scx_enabled()) {
 			next = prev;
 			goto picked;
 		}
--- a/kernel/sched/ext.c
+++ b/kernel/sched/ext.c
@ -874,6 +874,13 @@ static struct scx_exit_info *scx_exit_info;
 static atomic_long_t scx_nr_rejected = ATOMIC_LONG_INIT(0);
 static atomic_long_t scx_hotplug_seq = ATOMIC_LONG_INIT(0);
 /*
 * A monotically increasing sequence number that is incremented every time a
 * scheduler is enabled. This can be used by to check if any custom sched_ext
 * scheduler has ever been used in the system.
 */
 static atomic_long_t scx_enable_seq = ATOMIC_LONG_INIT(0);
 /*
 * The maximum amount of time in jiffies that a task may be runnable without
 * being scheduled on a CPU. If this timeout is exceeded, it will trigger
@ -4154,11 +4161,19 @@ static ssize_t scx_attr_hotplug_seq_show(struct kobject *kobj,
 }
 SCX_ATTR(hotplug_seq);
 static ssize_t scx_attr_enable_seq_show(struct kobject *kobj,
 					struct kobj_attribute *ka, char *buf)
 {
 	return sysfs_emit(buf, "%ld\n", atomic_long_read(&scx_enable_seq));
 }
 SCX_ATTR(enable_seq);
 static struct attribute *scx_global_attrs[] = {
 	&scx_attr_state.attr,
 	&scx_attr_switch_all.attr,
 	&scx_attr_nr_rejected.attr,
 	&scx_attr_hotplug_seq.attr,
 	&scx_attr_enable_seq.attr,
 	NULL,
 };
@ -4469,8 +4484,9 @@ static void scx_ops_disable_workfn(struct kthread_work *work)
 		if (ei->msg[0] != '\0')
 			pr_err("sched_ext: %s: %s\n", scx_ops.name, ei->msg);
-
+#ifdef CONFIG_STACKTRACE
 		stack_trace_print(ei->bt, ei->bt_len, 2);
 #endif
 	} else {
 		pr_info("sched_ext: BPF scheduler \"%s\" disabled (%s)\n",
 			scx_ops.name, ei->reason);
@ -4847,10 +4863,10 @@ static __printf(3, 4) void scx_ops_exit_kind(enum scx_exit_kind kind,
 		return;
 	ei->exit_code = exit_code;
-
+#ifdef CONFIG_STACKTRACE
 	if (kind >= SCX_EXIT_ERROR)
 		ei->bt_len = stack_trace_save(ei->bt, SCX_EXIT_BT_LEN, 1);
-
+#endif
 	va_start(args, fmt);
 	vscnprintf(ei->msg, SCX_EXIT_MSG_LEN, fmt, args);
 	va_end(args);
@ -5176,6 +5192,8 @@ static int scx_ops_enable(struct sched_ext_ops *ops, struct bpf_link *link)
 	kobject_uevent(scx_root_kobj, KOBJ_ADD);
 	mutex_unlock(&scx_ops_enable_mutex);
 	atomic_long_inc(&scx_enable_seq);
 	return 0;
 err_del:
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@ -432,16 +432,17 @@ struct cfs_bandwidth {
 struct task_group {
 	struct cgroup_subsys_state css;
 #ifdef CONFIG_GROUP_SCHED_WEIGHT
 	/* A positive value indicates that this is a SCHED_IDLE group. */
 	int			idle;
 #endif
 #ifdef CONFIG_FAIR_GROUP_SCHED
 	/* schedulable entities of this group on each CPU */
 	struct sched_entity	**se;
 	/* runqueue "owned" by this group on each CPU */
 	struct cfs_rq		**cfs_rq;
 	unsigned long		shares;
 	/* A positive value indicates that this is a SCHED_IDLE group. */
 	int			idle;
 #ifdef	CONFIG_SMP
 	/*
 	 * load_avg can be heavily contended at clock tick time, so put
@ -582,6 +583,7 @@ static inline void set_task_rq_fair(struct sched_entity *se,
 #endif /* CONFIG_SMP */
 #else /* !CONFIG_FAIR_GROUP_SCHED */
 static inline int sched_group_set_shares(struct task_group *tg, unsigned long shares) { return 0; }
 static inline int sched_group_set_idle(struct task_group *tg, long idle) { return 0; }
 #endif /* CONFIG_FAIR_GROUP_SCHED */
 #else /* CONFIG_CGROUP_SCHED */
--- a/tools/sched_ext/scx_show_state.py
+++ b/tools/sched_ext/scx_show_state.py
@ -37,3 +37,4 @@ print(f'switched_all  : {read_static_key("__scx_switched_all")}')
 print(f'enable_state  : {ops_state_str(enable_state)} ({enable_state})')
 print(f'bypass_depth  : {read_atomic("scx_ops_bypass_depth")}')
 print(f'nr_rejected   : {read_atomic("scx_nr_rejected")}')
 print(f'enable_seq    : {read_atomic("scx_enable_seq")}')