2019-06-01 01:08:55 -07:00
|
|
|
// SPDX-License-Identifier: GPL-2.0-only
|
2016-11-22 13:23:55 -07:00
|
|
|
/*
|
|
|
|
* itmt.c: Support Intel Turbo Boost Max Technology 3.0
|
|
|
|
*
|
|
|
|
* (C) Copyright 2016 Intel Corporation
|
|
|
|
* Author: Tim Chen <tim.c.chen@linux.intel.com>
|
|
|
|
*
|
|
|
|
* On platforms supporting Intel Turbo Boost Max Technology 3.0, (ITMT),
|
|
|
|
* the maximum turbo frequencies of some cores in a CPU package may be
|
|
|
|
* higher than for the other cores in the same package. In that case,
|
|
|
|
* better performance can be achieved by making the scheduler prefer
|
|
|
|
* to run tasks on the CPUs with higher max turbo frequencies.
|
|
|
|
*
|
|
|
|
* This file provides functions and data structures for enabling the
|
|
|
|
* scheduler to favor scheduling on cores can be boosted to a higher
|
|
|
|
* frequency under ITMT.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include <linux/sched.h>
|
|
|
|
#include <linux/cpumask.h>
|
|
|
|
#include <linux/cpuset.h>
|
2016-11-28 01:43:49 -07:00
|
|
|
#include <linux/mutex.h>
|
2016-11-22 13:23:55 -07:00
|
|
|
#include <linux/sysctl.h>
|
|
|
|
#include <linux/nodemask.h>
|
|
|
|
|
|
|
|
static DEFINE_MUTEX(itmt_update_mutex);
|
|
|
|
DEFINE_PER_CPU_READ_MOSTLY(int, sched_core_priority);
|
|
|
|
|
|
|
|
/* Boolean to track if system has ITMT capabilities */
|
|
|
|
static bool __read_mostly sched_itmt_capable;
|
|
|
|
|
2016-11-22 13:23:56 -07:00
|
|
|
/*
|
|
|
|
* Boolean to control whether we want to move processes to cpu capable
|
|
|
|
* of higher turbo frequency for cpus supporting Intel Turbo Boost Max
|
|
|
|
* Technology 3.0.
|
|
|
|
*
|
|
|
|
* It can be set via /proc/sys/kernel/sched_itmt_enabled
|
|
|
|
*/
|
|
|
|
unsigned int __read_mostly sysctl_sched_itmt_enabled;
|
|
|
|
|
sysctl: treewide: constify the ctl_table argument of proc_handlers
const qualify the struct ctl_table argument in the proc_handler function
signatures. This is a prerequisite to moving the static ctl_table
structs into .rodata data which will ensure that proc_handler function
pointers cannot be modified.
This patch has been generated by the following coccinelle script:
```
virtual patch
@r1@
identifier ctl, write, buffer, lenp, ppos;
identifier func !~ "appldata_(timer|interval)_handler|sched_(rt|rr)_handler|rds_tcp_skbuf_handler|proc_sctp_do_(hmac_alg|rto_min|rto_max|udp_port|alpha_beta|auth|probe_interval)";
@@
int func(
- struct ctl_table *ctl
+ const struct ctl_table *ctl
,int write, void *buffer, size_t *lenp, loff_t *ppos);
@r2@
identifier func, ctl, write, buffer, lenp, ppos;
@@
int func(
- struct ctl_table *ctl
+ const struct ctl_table *ctl
,int write, void *buffer, size_t *lenp, loff_t *ppos)
{ ... }
@r3@
identifier func;
@@
int func(
- struct ctl_table *
+ const struct ctl_table *
,int , void *, size_t *, loff_t *);
@r4@
identifier func, ctl;
@@
int func(
- struct ctl_table *ctl
+ const struct ctl_table *ctl
,int , void *, size_t *, loff_t *);
@r5@
identifier func, write, buffer, lenp, ppos;
@@
int func(
- struct ctl_table *
+ const struct ctl_table *
,int write, void *buffer, size_t *lenp, loff_t *ppos);
```
* Code formatting was adjusted in xfs_sysctl.c to comply with code
conventions. The xfs_stats_clear_proc_handler,
xfs_panic_mask_proc_handler and xfs_deprecated_dointvec_minmax where
adjusted.
* The ctl_table argument in proc_watchdog_common was const qualified.
This is called from a proc_handler itself and is calling back into
another proc_handler, making it necessary to change it as part of the
proc_handler migration.
Co-developed-by: Thomas Weißschuh <linux@weissschuh.net>
Signed-off-by: Thomas Weißschuh <linux@weissschuh.net>
Co-developed-by: Joel Granados <j.granados@samsung.com>
Signed-off-by: Joel Granados <j.granados@samsung.com>
2024-07-24 11:59:29 -07:00
|
|
|
static int sched_itmt_update_handler(const struct ctl_table *table, int write,
|
2020-04-23 23:43:38 -07:00
|
|
|
void *buffer, size_t *lenp, loff_t *ppos)
|
2016-11-22 13:23:56 -07:00
|
|
|
{
|
|
|
|
unsigned int old_sysctl;
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
mutex_lock(&itmt_update_mutex);
|
|
|
|
|
|
|
|
if (!sched_itmt_capable) {
|
|
|
|
mutex_unlock(&itmt_update_mutex);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
old_sysctl = sysctl_sched_itmt_enabled;
|
|
|
|
ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
|
|
|
|
|
|
|
|
if (!ret && write && old_sysctl != sysctl_sched_itmt_enabled) {
|
|
|
|
x86_topology_update = true;
|
|
|
|
rebuild_sched_domains();
|
|
|
|
}
|
|
|
|
|
|
|
|
mutex_unlock(&itmt_update_mutex);
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct ctl_table itmt_kern_table[] = {
|
|
|
|
{
|
|
|
|
.procname = "sched_itmt_enabled",
|
|
|
|
.data = &sysctl_sched_itmt_enabled,
|
|
|
|
.maxlen = sizeof(unsigned int),
|
|
|
|
.mode = 0644,
|
|
|
|
.proc_handler = sched_itmt_update_handler,
|
proc/sysctl: add shared variables for range check
In the sysctl code the proc_dointvec_minmax() function is often used to
validate the user supplied value between an allowed range. This
function uses the extra1 and extra2 members from struct ctl_table as
minimum and maximum allowed value.
On sysctl handler declaration, in every source file there are some
readonly variables containing just an integer which address is assigned
to the extra1 and extra2 members, so the sysctl range is enforced.
The special values 0, 1 and INT_MAX are very often used as range
boundary, leading duplication of variables like zero=0, one=1,
int_max=INT_MAX in different source files:
$ git grep -E '\.extra[12].*&(zero|one|int_max)' |wc -l
248
Add a const int array containing the most commonly used values, some
macros to refer more easily to the correct array member, and use them
instead of creating a local one for every object file.
This is the bloat-o-meter output comparing the old and new binary
compiled with the default Fedora config:
# scripts/bloat-o-meter -d vmlinux.o.old vmlinux.o
add/remove: 2/2 grow/shrink: 0/2 up/down: 24/-188 (-164)
Data old new delta
sysctl_vals - 12 +12
__kstrtab_sysctl_vals - 12 +12
max 14 10 -4
int_max 16 - -16
one 68 - -68
zero 128 28 -100
Total: Before=20583249, After=20583085, chg -0.00%
[mcroce@redhat.com: tipc: remove two unused variables]
Link: http://lkml.kernel.org/r/20190530091952.4108-1-mcroce@redhat.com
[akpm@linux-foundation.org: fix net/ipv6/sysctl_net_ipv6.c]
[arnd@arndb.de: proc/sysctl: make firmware loader table conditional]
Link: http://lkml.kernel.org/r/20190617130014.1713870-1-arnd@arndb.de
[akpm@linux-foundation.org: fix fs/eventpoll.c]
Link: http://lkml.kernel.org/r/20190430180111.10688-1-mcroce@redhat.com
Signed-off-by: Matteo Croce <mcroce@redhat.com>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Kees Cook <keescook@chromium.org>
Reviewed-by: Aaron Tomlin <atomlin@redhat.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2019-07-18 15:58:50 -07:00
|
|
|
.extra1 = SYSCTL_ZERO,
|
|
|
|
.extra2 = SYSCTL_ONE,
|
2016-11-22 13:23:56 -07:00
|
|
|
},
|
|
|
|
};
|
|
|
|
|
|
|
|
static struct ctl_table_header *itmt_sysctl_header;
|
|
|
|
|
2016-11-22 13:23:55 -07:00
|
|
|
/**
|
|
|
|
* sched_set_itmt_support() - Indicate platform supports ITMT
|
|
|
|
*
|
|
|
|
* This function is used by the OS to indicate to scheduler that the platform
|
|
|
|
* is capable of supporting the ITMT feature.
|
|
|
|
*
|
|
|
|
* The current scheme has the pstate driver detects if the system
|
|
|
|
* is ITMT capable and call sched_set_itmt_support.
|
|
|
|
*
|
|
|
|
* This must be done only after sched_set_itmt_core_prio
|
|
|
|
* has been called to set the cpus' priorities.
|
2016-11-22 13:23:56 -07:00
|
|
|
* It must not be called with cpu hot plug lock
|
|
|
|
* held as we need to acquire the lock to rebuild sched domains
|
|
|
|
* later.
|
|
|
|
*
|
|
|
|
* Return: 0 on success
|
2016-11-22 13:23:55 -07:00
|
|
|
*/
|
2016-11-22 13:23:56 -07:00
|
|
|
int sched_set_itmt_support(void)
|
2016-11-22 13:23:55 -07:00
|
|
|
{
|
|
|
|
mutex_lock(&itmt_update_mutex);
|
|
|
|
|
2016-11-22 13:23:56 -07:00
|
|
|
if (sched_itmt_capable) {
|
|
|
|
mutex_unlock(&itmt_update_mutex);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2023-03-10 16:32:48 -07:00
|
|
|
itmt_sysctl_header = register_sysctl("kernel", itmt_kern_table);
|
2016-11-22 13:23:56 -07:00
|
|
|
if (!itmt_sysctl_header) {
|
|
|
|
mutex_unlock(&itmt_update_mutex);
|
|
|
|
return -ENOMEM;
|
|
|
|
}
|
|
|
|
|
2016-11-22 13:23:55 -07:00
|
|
|
sched_itmt_capable = true;
|
|
|
|
|
2016-11-22 13:23:56 -07:00
|
|
|
sysctl_sched_itmt_enabled = 1;
|
|
|
|
|
2017-01-18 15:30:29 -07:00
|
|
|
x86_topology_update = true;
|
|
|
|
rebuild_sched_domains();
|
2016-11-22 13:23:56 -07:00
|
|
|
|
2016-11-22 13:23:55 -07:00
|
|
|
mutex_unlock(&itmt_update_mutex);
|
2016-11-22 13:23:56 -07:00
|
|
|
|
|
|
|
return 0;
|
2016-11-22 13:23:55 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* sched_clear_itmt_support() - Revoke platform's support of ITMT
|
|
|
|
*
|
|
|
|
* This function is used by the OS to indicate that it has
|
|
|
|
* revoked the platform's support of ITMT feature.
|
|
|
|
*
|
2016-11-22 13:23:56 -07:00
|
|
|
* It must not be called with cpu hot plug lock
|
|
|
|
* held as we need to acquire the lock to rebuild sched domains
|
|
|
|
* later.
|
2016-11-22 13:23:55 -07:00
|
|
|
*/
|
|
|
|
void sched_clear_itmt_support(void)
|
|
|
|
{
|
|
|
|
mutex_lock(&itmt_update_mutex);
|
|
|
|
|
2016-11-22 13:23:56 -07:00
|
|
|
if (!sched_itmt_capable) {
|
|
|
|
mutex_unlock(&itmt_update_mutex);
|
|
|
|
return;
|
|
|
|
}
|
2016-11-22 13:23:55 -07:00
|
|
|
sched_itmt_capable = false;
|
|
|
|
|
2016-11-22 13:23:56 -07:00
|
|
|
if (itmt_sysctl_header) {
|
|
|
|
unregister_sysctl_table(itmt_sysctl_header);
|
|
|
|
itmt_sysctl_header = NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (sysctl_sched_itmt_enabled) {
|
|
|
|
/* disable sched_itmt if we are no longer ITMT capable */
|
|
|
|
sysctl_sched_itmt_enabled = 0;
|
|
|
|
x86_topology_update = true;
|
|
|
|
rebuild_sched_domains();
|
|
|
|
}
|
|
|
|
|
2016-11-22 13:23:55 -07:00
|
|
|
mutex_unlock(&itmt_update_mutex);
|
|
|
|
}
|
|
|
|
|
|
|
|
int arch_asym_cpu_priority(int cpu)
|
|
|
|
{
|
|
|
|
return per_cpu(sched_core_priority, cpu);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* sched_set_itmt_core_prio() - Set CPU priority based on ITMT
|
2023-04-06 13:31:47 -07:00
|
|
|
* @prio: Priority of @cpu
|
|
|
|
* @cpu: The CPU number
|
2016-11-22 13:23:55 -07:00
|
|
|
*
|
|
|
|
* The pstate driver will find out the max boost frequency
|
|
|
|
* and call this function to set a priority proportional
|
2023-04-06 13:31:47 -07:00
|
|
|
* to the max boost frequency. CPUs with higher boost
|
2016-11-22 13:23:55 -07:00
|
|
|
* frequency will receive higher priority.
|
|
|
|
*
|
|
|
|
* No need to rebuild sched domain after updating
|
|
|
|
* the CPU priorities. The sched domains have no
|
|
|
|
* dependency on CPU priorities.
|
|
|
|
*/
|
2023-04-06 13:31:47 -07:00
|
|
|
void sched_set_itmt_core_prio(int prio, int cpu)
|
2016-11-22 13:23:55 -07:00
|
|
|
{
|
2023-04-06 13:31:47 -07:00
|
|
|
per_cpu(sched_core_priority, cpu) = prio;
|
2016-11-22 13:23:55 -07:00
|
|
|
}
|