1

Scheduler changes for v6.11:

- Update Daniel Bristot de Oliveira's entry in MAINTAINERS,
    and credit him in CREDITS.
 
  - Harmonize the lock-yielding behavior on dynamically selected
    preemption models with static ones.
 
  - Reorganize the code a bit: split out sched/syscalls.c to reduce
    the size of sched/core.c
 
  - Micro-optimize psi_group_change()
 
  - Fix set_load_weight() for SCHED_IDLE tasks
 
  - Misc cleanups & fixes
 
 Signed-off-by: Ingo Molnar <mingo@kernel.org>
 -----BEGIN PGP SIGNATURE-----
 
 iQJFBAABCgAvFiEEBpT5eoXrXCwVQwEKEnMQ0APhK1gFAmaVtVARHG1pbmdvQGtl
 cm5lbC5vcmcACgkQEnMQ0APhK1iqTQ/9GLNzNBnl0oBWCiybeQjyWsZ6BiZi48R0
 C1g9/RKy++OyGOjn/yqYK0Kg8cdfoGzHGioMMAucHFW1nXZwVw17xAJK127N0apF
 83up7AnFJw/JGr1bI0FwuozqHAs4Z5KzHTv2KBxhYuO77lyYna6/t0liRUbF8ZUZ
 I/nqav7wDB8RBIB5hEJ/uYLDX7qWdUlyFB+mcvV4ANA99yr++OgipCp6Ob3Rz3cP
 O676nKJY4vpNbZ/B6bpKg8ezULRP8re2qD3GJRf2huS63uu/Z5ct7ouLVZ1DwN53
 mFDBTYUMI2ToV0pseikuqwnmrjxAKcEajTyZpD3vckafd2TlWIopkQZoQ9XLLlIZ
 DxO+KoekaHTSVy8FWlO8O+iE3IAdUUgECEpNveX45Pb7nFP+5dtFqqnVIdNqCq5e
 zEuQvizaa5m+A1POZhZKya+z9jbLXXx+gtPCbbADTBWtuyl8azUIh3vjn0bykmv4
 IVV/wvUm+BPEIhnKusZZOgB0vLtxUdntBBfUSxqoSOad9L+0/UtSKoKI6wvW00q8
 ZkW+85yS3YFiN9W61276RLis2j7OAjE0eDJ96wfhooma2JRDJU4Wmg5oWg8x3WuA
 JRmK0s63Qik5gpwG5rHQsR5jNqYWTj5Lp7So+M1kRfFsOM/RXQ/AneSXZu/P7d65
 LnYWzbKu76c=
 =lLab
 -----END PGP SIGNATURE-----

Merge tag 'sched-core-2024-07-16' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull scheduler updates from Ingo Molnar:

 - Update Daniel Bristot de Oliveira's entry in MAINTAINERS,
   and credit him in CREDITS

 - Harmonize the lock-yielding behavior on dynamically selected
   preemption models with static ones

 - Reorganize the code a bit: split out sched/syscalls.c to reduce
   the size of sched/core.c

 - Micro-optimize psi_group_change()

 - Fix set_load_weight() for SCHED_IDLE tasks

 - Misc cleanups & fixes

* tag 'sched-core-2024-07-16' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  sched: Update MAINTAINERS and CREDITS
  sched/fair: set_load_weight() must also call reweight_task() for SCHED_IDLE tasks
  sched/psi: Optimise psi_group_change a bit
  sched/core: Drop spinlocks on contention iff kernel is preemptible
  sched/core: Move preempt_model_*() helpers from sched.h to preempt.h
  sched/balance: Skip unnecessary updates to idle load balancer's flags
  idle: Remove stale RCU comment
  sched/headers: Move struct pre-declarations to the beginning of the header
  sched/core: Clean up kernel/sched/sched.h a bit
  sched/core: Simplify prefetch_curr_exec_start()
  sched: Fix spelling in comments
  sched/syscalls: Split out kernel/sched/syscalls.c from kernel/sched/core.c
This commit is contained in:
Linus Torvalds 2024-07-16 17:00:50 -07:00
commit 4a996d90b9
23 changed files with 2183 additions and 2095 deletions

View File

@ -271,6 +271,9 @@ D: Driver for WaveFront soundcards (Turtle Beach Maui, Tropez, Tropez+)
D: Various bugfixes and changes to sound drivers
S: USA
N: Daniel Bristot de Oliveira
D: Scheduler contributions, notably: SCHED_DEADLINE
N: Carlos Henrique Bauer
E: chbauer@acm.org
E: bauer@atlas.unisinos.br

View File

@ -4728,7 +4728,9 @@
none - Limited to cond_resched() calls
voluntary - Limited to cond_resched() and might_sleep() calls
full - Any section that isn't explicitly preempt disabled
can be preempted anytime.
can be preempted anytime. Tasks will also yield
contended spinlocks (if the critical section isn't
explicitly preempt disabled beyond the lock itself).
print-fatal-signals=
[KNL] debug: print fatal signals

View File

@ -20047,7 +20047,6 @@ R: Dietmar Eggemann <dietmar.eggemann@arm.com> (SCHED_NORMAL)
R: Steven Rostedt <rostedt@goodmis.org> (SCHED_FIFO/SCHED_RR)
R: Ben Segall <bsegall@google.com> (CONFIG_CFS_BANDWIDTH)
R: Mel Gorman <mgorman@suse.de> (CONFIG_NUMA_BALANCING)
R: Daniel Bristot de Oliveira <bristot@redhat.com> (SCHED_DEADLINE)
R: Valentin Schneider <vschneid@redhat.com> (TOPOLOGY)
L: linux-kernel@vger.kernel.org
S: Maintained

View File

@ -481,4 +481,45 @@ DEFINE_LOCK_GUARD_0(preempt, preempt_disable(), preempt_enable())
DEFINE_LOCK_GUARD_0(preempt_notrace, preempt_disable_notrace(), preempt_enable_notrace())
DEFINE_LOCK_GUARD_0(migrate, migrate_disable(), migrate_enable())
#ifdef CONFIG_PREEMPT_DYNAMIC
extern bool preempt_model_none(void);
extern bool preempt_model_voluntary(void);
extern bool preempt_model_full(void);
#else
static inline bool preempt_model_none(void)
{
return IS_ENABLED(CONFIG_PREEMPT_NONE);
}
static inline bool preempt_model_voluntary(void)
{
return IS_ENABLED(CONFIG_PREEMPT_VOLUNTARY);
}
static inline bool preempt_model_full(void)
{
return IS_ENABLED(CONFIG_PREEMPT);
}
#endif
static inline bool preempt_model_rt(void)
{
return IS_ENABLED(CONFIG_PREEMPT_RT);
}
/*
* Does the preemption model allow non-cooperative preemption?
*
* For !CONFIG_PREEMPT_DYNAMIC kernels this is an exact match with
* CONFIG_PREEMPTION; for CONFIG_PREEMPT_DYNAMIC this doesn't work as the
* kernel is *built* with CONFIG_PREEMPTION=y but may run with e.g. the
* PREEMPT_NONE model.
*/
static inline bool preempt_model_preemptible(void)
{
return preempt_model_full() || preempt_model_rt();
}
#endif /* __LINUX_PREEMPT_H */

View File

@ -2064,47 +2064,6 @@ extern int __cond_resched_rwlock_write(rwlock_t *lock);
__cond_resched_rwlock_write(lock); \
})
#ifdef CONFIG_PREEMPT_DYNAMIC
extern bool preempt_model_none(void);
extern bool preempt_model_voluntary(void);
extern bool preempt_model_full(void);
#else
static inline bool preempt_model_none(void)
{
return IS_ENABLED(CONFIG_PREEMPT_NONE);
}
static inline bool preempt_model_voluntary(void)
{
return IS_ENABLED(CONFIG_PREEMPT_VOLUNTARY);
}
static inline bool preempt_model_full(void)
{
return IS_ENABLED(CONFIG_PREEMPT);
}
#endif
static inline bool preempt_model_rt(void)
{
return IS_ENABLED(CONFIG_PREEMPT_RT);
}
/*
* Does the preemption model allow non-cooperative preemption?
*
* For !CONFIG_PREEMPT_DYNAMIC kernels this is an exact match with
* CONFIG_PREEMPTION; for CONFIG_PREEMPT_DYNAMIC this doesn't work as the
* kernel is *built* with CONFIG_PREEMPTION=y but may run with e.g. the
* PREEMPT_NONE model.
*/
static inline bool preempt_model_preemptible(void)
{
return preempt_model_full() || preempt_model_rt();
}
static __always_inline bool need_resched(void)
{
return unlikely(tif_need_resched());

View File

@ -462,11 +462,10 @@ static __always_inline int spin_is_contended(spinlock_t *lock)
*/
static inline int spin_needbreak(spinlock_t *lock)
{
#ifdef CONFIG_PREEMPTION
return spin_is_contended(lock);
#else
if (!preempt_model_preemptible())
return 0;
#endif
return spin_is_contended(lock);
}
/*
@ -479,11 +478,10 @@ static inline int spin_needbreak(spinlock_t *lock)
*/
static inline int rwlock_needbreak(rwlock_t *lock)
{
#ifdef CONFIG_PREEMPTION
return rwlock_is_contended(lock);
#else
if (!preempt_model_preemptible())
return 0;
#endif
return rwlock_is_contended(lock);
}
/*

View File

@ -52,3 +52,4 @@
#include "cputime.c"
#include "deadline.c"
#include "syscalls.c"

View File

@ -444,7 +444,7 @@ notrace void sched_clock_tick_stable(void)
}
/*
* We are going deep-idle (irqs are disabled):
* We are going deep-idle (IRQs are disabled):
*/
notrace void sched_clock_idle_sleep_event(void)
{

File diff suppressed because it is too large Load Diff

View File

@ -279,7 +279,7 @@ void __sched_core_account_forceidle(struct rq *rq)
continue;
/*
* Note: this will account forceidle to the current cpu, even
* Note: this will account forceidle to the current CPU, even
* if it comes from our SMT sibling.
*/
__account_forceidle_time(p, delta);

View File

@ -14,11 +14,11 @@
* They are only modified in vtime_account, on corresponding CPU
* with interrupts disabled. So, writes are safe.
* They are read and saved off onto struct rq in update_rq_clock().
* This may result in other CPU reading this CPU's irq time and can
* This may result in other CPU reading this CPU's IRQ time and can
* race with irq/vtime_account on this CPU. We would either get old
* or new value with a side effect of accounting a slice of irq time to wrong
* task when irq is in progress while we read rq->clock. That is a worthy
* compromise in place of having locks on each irq in account_system_time.
* or new value with a side effect of accounting a slice of IRQ time to wrong
* task when IRQ is in progress while we read rq->clock. That is a worthy
* compromise in place of having locks on each IRQ in account_system_time.
*/
DEFINE_PER_CPU(struct irqtime, cpu_irqtime);
@ -269,7 +269,7 @@ static __always_inline u64 steal_account_process_time(u64 maxtime)
}
/*
* Account how much elapsed time was spent in steal, irq, or softirq time.
* Account how much elapsed time was spent in steal, IRQ, or softirq time.
*/
static inline u64 account_other_time(u64 max)
{
@ -370,7 +370,7 @@ void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times)
* Check for hardirq is done both for system and user time as there is
* no timer going off while we are on hardirq and hence we may never get an
* opportunity to update it solely in system time.
* p->stime and friends are only updated on system time and not on irq
* p->stime and friends are only updated on system time and not on IRQ
* softirq as those do not count in task exec_runtime any more.
*/
static void irqtime_account_process_tick(struct task_struct *p, int user_tick,
@ -380,7 +380,7 @@ static void irqtime_account_process_tick(struct task_struct *p, int user_tick,
/*
* When returning from idle, many ticks can get accounted at
* once, including some ticks of steal, irq, and softirq time.
* once, including some ticks of steal, IRQ, and softirq time.
* Subtract those ticks from the amount of time accounted to
* idle, or potentially user or system time. Due to rounding,
* other time can exceed ticks occasionally.

View File

@ -992,7 +992,7 @@ static inline bool dl_is_implicit(struct sched_dl_entity *dl_se)
* is detected, the runtime and deadline need to be updated.
*
* If the task has an implicit deadline, i.e., deadline == period, the Original
* CBS is applied. the runtime is replenished and a new absolute deadline is
* CBS is applied. The runtime is replenished and a new absolute deadline is
* set, as in the previous cases.
*
* However, the Original CBS does not work properly for tasks with
@ -1294,7 +1294,7 @@ int dl_runtime_exceeded(struct sched_dl_entity *dl_se)
* Since rq->dl.running_bw and rq->dl.this_bw contain utilizations multiplied
* by 2^BW_SHIFT, the result has to be shifted right by BW_SHIFT.
* Since rq->dl.bw_ratio contains 1 / Umax multiplied by 2^RATIO_SHIFT, dl_bw
* is multiped by rq->dl.bw_ratio and shifted right by RATIO_SHIFT.
* is multiplied by rq->dl.bw_ratio and shifted right by RATIO_SHIFT.
* Since delta is a 64 bit variable, to have an overflow its value should be
* larger than 2^(64 - 20 - 8), which is more than 64 seconds. So, overflow is
* not an issue here.
@ -2493,7 +2493,7 @@ static void pull_dl_task(struct rq *this_rq)
src_rq = cpu_rq(cpu);
/*
* It looks racy, abd it is! However, as in sched_rt.c,
* It looks racy, and it is! However, as in sched_rt.c,
* we are fine with this.
*/
if (this_rq->dl.dl_nr_running &&

View File

@ -61,7 +61,7 @@
* Options are:
*
* SCHED_TUNABLESCALING_NONE - unscaled, always *1
* SCHED_TUNABLESCALING_LOG - scaled logarithmical, *1+ilog(ncpus)
* SCHED_TUNABLESCALING_LOG - scaled logarithmically, *1+ilog(ncpus)
* SCHED_TUNABLESCALING_LINEAR - scaled linear, *ncpus
*
* (default SCHED_TUNABLESCALING_LOG = *(1+ilog(ncpus))
@ -3835,15 +3835,14 @@ static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se,
}
}
void reweight_task(struct task_struct *p, int prio)
void reweight_task(struct task_struct *p, const struct load_weight *lw)
{
struct sched_entity *se = &p->se;
struct cfs_rq *cfs_rq = cfs_rq_of(se);
struct load_weight *load = &se->load;
unsigned long weight = scale_load(sched_prio_to_weight[prio]);
reweight_entity(cfs_rq, se, weight);
load->inv_weight = sched_prio_to_wmult[prio];
reweight_entity(cfs_rq, se, lw->weight);
load->inv_weight = lw->inv_weight;
}
static inline int throttled_hierarchy(struct cfs_rq *cfs_rq);
@ -8719,7 +8718,7 @@ static bool yield_to_task_fair(struct rq *rq, struct task_struct *p)
* topology where each level pairs two lower groups (or better). This results
* in O(log n) layers. Furthermore we reduce the number of CPUs going up the
* tree to only the first of the previous level and we decrease the frequency
* of load-balance at each level inv. proportional to the number of CPUs in
* of load-balance at each level inversely proportional to the number of CPUs in
* the groups.
*
* This yields:
@ -11885,6 +11884,13 @@ static void kick_ilb(unsigned int flags)
if (ilb_cpu < 0)
return;
/*
* Don't bother if no new NOHZ balance work items for ilb_cpu,
* i.e. all bits in flags are already set in ilb_cpu.
*/
if ((atomic_read(nohz_flags(ilb_cpu)) & flags) == flags)
return;
/*
* Access to rq::nohz_csd is serialized by NOHZ_KICK_MASK; he who sets
* the first flag owns it; cleared by nohz_csd_func().

View File

@ -172,19 +172,13 @@ static void cpuidle_idle_call(void)
/*
* Check if the idle task must be rescheduled. If it is the
* case, exit the function after re-enabling the local irq.
* case, exit the function after re-enabling the local IRQ.
*/
if (need_resched()) {
local_irq_enable();
return;
}
/*
* The RCU framework needs to be told that we are entering an idle
* section, so no more rcu read side critical sections and one more
* step to the grace period
*/
if (cpuidle_not_available(drv, dev)) {
tick_nohz_idle_stop_tick();
@ -244,7 +238,7 @@ exit_idle:
__current_set_polling();
/*
* It is up to the idle functions to reenable local interrupts
* It is up to the idle functions to re-enable local interrupts
*/
if (WARN_ON_ONCE(irqs_disabled()))
local_irq_enable();
@ -320,7 +314,7 @@ static void do_idle(void)
rcu_nocb_flush_deferred_wakeup();
/*
* In poll mode we reenable interrupts and spin. Also if we
* In poll mode we re-enable interrupts and spin. Also if we
* detected in the wakeup from idle path that the tick
* broadcast device expired for us, we don't want to go deep
* idle as we know that the IPI is going to arrive right away.

View File

@ -45,7 +45,7 @@
* again, being late doesn't loose the delta, just wrecks the sample.
*
* - cpu_rq()->nr_uninterruptible isn't accurately tracked per-CPU because
* this would add another cross-CPU cacheline miss and atomic operation
* this would add another cross-CPU cache-line miss and atomic operation
* to the wakeup path. Instead we increment on whatever CPU the task ran
* when it went into uninterruptible state and decrement on whatever CPU
* did the wakeup. This means that only the sum of nr_uninterruptible over
@ -62,7 +62,7 @@ EXPORT_SYMBOL(avenrun); /* should be removed */
/**
* get_avenrun - get the load average array
* @loads: pointer to dest load array
* @loads: pointer to destination load array
* @offset: offset to add
* @shift: shift count to shift the result left
*

View File

@ -417,7 +417,7 @@ int update_hw_load_avg(u64 now, struct rq *rq, u64 capacity)
#ifdef CONFIG_HAVE_SCHED_AVG_IRQ
/*
* irq:
* IRQ:
*
* util_sum = \Sum se->avg.util_sum but se->avg.util_sum is not tracked
* util_sum = cpu_scale * load_sum
@ -432,7 +432,7 @@ int update_irq_load_avg(struct rq *rq, u64 running)
int ret = 0;
/*
* We can't use clock_pelt because irq time is not accounted in
* We can't use clock_pelt because IRQ time is not accounted in
* clock_task. Instead we directly scale the running time to
* reflect the real amount of computation
*/

View File

@ -41,7 +41,7 @@
* What it means for a task to be productive is defined differently
* for each resource. For IO, productive means a running task. For
* memory, productive means a running task that isn't a reclaimer. For
* CPU, productive means an oncpu task.
* CPU, productive means an on-CPU task.
*
* Naturally, the FULL state doesn't exist for the CPU resource at the
* system level, but exist at the cgroup level. At the cgroup level,
@ -218,28 +218,32 @@ void __init psi_init(void)
group_init(&psi_system);
}
static bool test_state(unsigned int *tasks, enum psi_states state, bool oncpu)
static u32 test_states(unsigned int *tasks, u32 state_mask)
{
switch (state) {
case PSI_IO_SOME:
return unlikely(tasks[NR_IOWAIT]);
case PSI_IO_FULL:
return unlikely(tasks[NR_IOWAIT] && !tasks[NR_RUNNING]);
case PSI_MEM_SOME:
return unlikely(tasks[NR_MEMSTALL]);
case PSI_MEM_FULL:
return unlikely(tasks[NR_MEMSTALL] &&
tasks[NR_RUNNING] == tasks[NR_MEMSTALL_RUNNING]);
case PSI_CPU_SOME:
return unlikely(tasks[NR_RUNNING] > oncpu);
case PSI_CPU_FULL:
return unlikely(tasks[NR_RUNNING] && !oncpu);
case PSI_NONIDLE:
return tasks[NR_IOWAIT] || tasks[NR_MEMSTALL] ||
tasks[NR_RUNNING];
default:
return false;
const bool oncpu = state_mask & PSI_ONCPU;
if (tasks[NR_IOWAIT]) {
state_mask |= BIT(PSI_IO_SOME);
if (!tasks[NR_RUNNING])
state_mask |= BIT(PSI_IO_FULL);
}
if (tasks[NR_MEMSTALL]) {
state_mask |= BIT(PSI_MEM_SOME);
if (tasks[NR_RUNNING] == tasks[NR_MEMSTALL_RUNNING])
state_mask |= BIT(PSI_MEM_FULL);
}
if (tasks[NR_RUNNING] > oncpu)
state_mask |= BIT(PSI_CPU_SOME);
if (tasks[NR_RUNNING] && !oncpu)
state_mask |= BIT(PSI_CPU_FULL);
if (tasks[NR_IOWAIT] || tasks[NR_MEMSTALL] || tasks[NR_RUNNING])
state_mask |= BIT(PSI_NONIDLE);
return state_mask;
}
static void get_recent_times(struct psi_group *group, int cpu,
@ -770,7 +774,6 @@ static void psi_group_change(struct psi_group *group, int cpu,
{
struct psi_group_cpu *groupc;
unsigned int t, m;
enum psi_states s;
u32 state_mask;
lockdep_assert_rq_held(cpu_rq(cpu));
@ -842,10 +845,7 @@ static void psi_group_change(struct psi_group *group, int cpu,
return;
}
for (s = 0; s < NR_PSI_STATES; s++) {
if (test_state(groupc->tasks, s, state_mask & PSI_ONCPU))
state_mask |= (1 << s);
}
state_mask = test_states(groupc->tasks, state_mask);
/*
* Since we care about lost potential, a memstall is FULL
@ -1205,7 +1205,7 @@ void psi_cgroup_restart(struct psi_group *group)
/*
* After we disable psi_group->enabled, we don't actually
* stop percpu tasks accounting in each psi_group_cpu,
* instead only stop test_state() loop, record_times()
* instead only stop test_states() loop, record_times()
* and averaging worker, see psi_group_change() for details.
*
* When disable cgroup PSI, this function has nothing to sync
@ -1213,7 +1213,7 @@ void psi_cgroup_restart(struct psi_group *group)
* would see !psi_group->enabled and only do task accounting.
*
* When re-enable cgroup PSI, this function use psi_group_change()
* to get correct state mask from test_state() loop on tasks[],
* to get correct state mask from test_states() loop on tasks[],
* and restart groupc->state_start from now, use .clear = .set = 0
* here since no task status really changed.
*/

View File

@ -140,7 +140,7 @@ void init_rt_rq(struct rt_rq *rt_rq)
INIT_LIST_HEAD(array->queue + i);
__clear_bit(i, array->bitmap);
}
/* delimiter for bitsearch: */
/* delimiter for bit-search: */
__set_bit(MAX_RT_PRIO, array->bitmap);
#if defined CONFIG_SMP
@ -1135,7 +1135,7 @@ dec_rt_prio(struct rt_rq *rt_rq, int prio)
/*
* This may have been our highest task, and therefore
* we may have some recomputation to do
* we may have some re-computation to do
*/
if (prio == prev_prio) {
struct rt_prio_array *array = &rt_rq->active;
@ -1571,7 +1571,7 @@ select_task_rq_rt(struct task_struct *p, int cpu, int flags)
*
* For equal prio tasks, we just let the scheduler sort it out.
*
* Otherwise, just let it ride on the affined RQ and the
* Otherwise, just let it ride on the affine RQ and the
* post-schedule router will push the preempted task away
*
* This test is optimistic, if we get it wrong the load-balancer
@ -2147,14 +2147,14 @@ static void push_rt_tasks(struct rq *rq)
* if its the only CPU with multiple RT tasks queued, and a large number
* of CPUs scheduling a lower priority task at the same time.
*
* Each root domain has its own irq work function that can iterate over
* Each root domain has its own IRQ work function that can iterate over
* all CPUs with RT overloaded tasks. Since all CPUs with overloaded RT
* task must be checked if there's one or many CPUs that are lowering
* their priority, there's a single irq work iterator that will try to
* their priority, there's a single IRQ work iterator that will try to
* push off RT tasks that are waiting to run.
*
* When a CPU schedules a lower priority task, it will kick off the
* irq work iterator that will jump to each CPU with overloaded RT tasks.
* IRQ work iterator that will jump to each CPU with overloaded RT tasks.
* As it only takes the first CPU that schedules a lower priority task
* to start the process, the rto_start variable is incremented and if
* the atomic result is one, then that CPU will try to take the rto_lock.
@ -2162,7 +2162,7 @@ static void push_rt_tasks(struct rq *rq)
* CPUs scheduling lower priority tasks.
*
* All CPUs that are scheduling a lower priority task will increment the
* rt_loop_next variable. This will make sure that the irq work iterator
* rt_loop_next variable. This will make sure that the IRQ work iterator
* checks all RT overloaded CPUs whenever a CPU schedules a new lower
* priority task, even if the iterator is in the middle of a scan. Incrementing
* the rt_loop_next will cause the iterator to perform another scan.
@ -2242,7 +2242,7 @@ static void tell_cpu_to_push(struct rq *rq)
* The rto_cpu is updated under the lock, if it has a valid CPU
* then the IPI is still running and will continue due to the
* update to loop_next, and nothing needs to be done here.
* Otherwise it is finishing up and an ipi needs to be sent.
* Otherwise it is finishing up and an IPI needs to be sent.
*/
if (rq->rd->rto_cpu < 0)
cpu = rto_next_cpu(rq->rd);
@ -2594,7 +2594,7 @@ static void task_tick_rt(struct rq *rq, struct task_struct *p, int queued)
watchdog(rq, p);
/*
* RR tasks need a special form of timeslice management.
* RR tasks need a special form of time-slice management.
* FIFO tasks have no timeslices.
*/
if (p->policy != SCHED_RR)
@ -2900,7 +2900,7 @@ static int sched_rt_global_constraints(void)
int sched_rt_can_attach(struct task_group *tg, struct task_struct *tsk)
{
/* Don't accept realtime tasks when there is no way for them to run */
/* Don't accept real-time tasks when there is no way for them to run */
if (rt_task(tsk) && tg->rt_bandwidth.rt_runtime == 0)
return 0;
@ -3001,7 +3001,7 @@ static int sched_rr_handler(struct ctl_table *table, int write, void *buffer,
ret = proc_dointvec(table, write, buffer, lenp, ppos);
/*
* Make sure that internally we keep jiffies.
* Also, writing zero resets the timeslice to default:
* Also, writing zero resets the time-slice to default:
*/
if (!ret && write) {
sched_rr_timeslice =

View File

@ -74,6 +74,12 @@
#include "../workqueue_internal.h"
struct rq;
struct cfs_rq;
struct rt_rq;
struct sched_group;
struct cpuidle_state;
#ifdef CONFIG_PARAVIRT
# include <asm/paravirt.h>
# include <asm/paravirt_api_clock.h>
@ -90,9 +96,6 @@
# define SCHED_WARN_ON(x) ({ (void)(x), 0; })
#endif
struct rq;
struct cpuidle_state;
/* task_struct::on_rq states: */
#define TASK_ON_RQ_QUEUED 1
#define TASK_ON_RQ_MIGRATING 2
@ -128,12 +131,12 @@ extern struct list_head asym_cap_list;
/*
* Helpers for converting nanosecond timing to jiffy resolution
*/
#define NS_TO_JIFFIES(TIME) ((unsigned long)(TIME) / (NSEC_PER_SEC / HZ))
#define NS_TO_JIFFIES(time) ((unsigned long)(time) / (NSEC_PER_SEC/HZ))
/*
* Increase resolution of nice-level calculations for 64-bit architectures.
* The extra resolution improves shares distribution and load balancing of
* low-weight task groups (eg. nice +19 on an autogroup), deeper taskgroup
* low-weight task groups (eg. nice +19 on an autogroup), deeper task-group
* hierarchies, especially on larger systems. This is not a user-visible change
* and does not change the user-interface for setting shares/weights.
*
@ -150,6 +153,7 @@ extern struct list_head asym_cap_list;
# define scale_load_down(w) \
({ \
unsigned long __w = (w); \
\
if (__w) \
__w = max(2UL, __w >> SCHED_FIXEDPOINT_SHIFT); \
__w; \
@ -187,6 +191,7 @@ static inline int idle_policy(int policy)
{
return policy == SCHED_IDLE;
}
static inline int fair_policy(int policy)
{
return policy == SCHED_NORMAL || policy == SCHED_BATCH;
@ -201,6 +206,7 @@ static inline int dl_policy(int policy)
{
return policy == SCHED_DEADLINE;
}
static inline bool valid_policy(int policy)
{
return idle_policy(policy) || fair_policy(policy) ||
@ -227,6 +233,7 @@ static inline int task_has_dl_policy(struct task_struct *p)
static inline void update_avg(u64 *avg, u64 sample)
{
s64 diff = sample - *avg;
*avg += diff / 8;
}
@ -358,9 +365,6 @@ extern void dl_server_init(struct sched_dl_entity *dl_se, struct rq *rq,
#ifdef CONFIG_CGROUP_SCHED
struct cfs_rq;
struct rt_rq;
extern struct list_head task_groups;
struct cfs_bandwidth {
@ -406,7 +410,7 @@ struct task_group {
#ifdef CONFIG_SMP
/*
* load_avg can be heavily contended at clock tick time, so put
* it in its own cacheline separated from the fields above which
* it in its own cache-line separated from the fields above which
* will also be accessed at each tick.
*/
atomic_long_t load_avg ____cacheline_aligned;
@ -536,6 +540,7 @@ static inline void set_task_rq_fair(struct sched_entity *se,
#else /* CONFIG_CGROUP_SCHED */
struct cfs_bandwidth { };
static inline bool cfs_task_bw_constrained(struct task_struct *p) { return false; }
#endif /* CONFIG_CGROUP_SCHED */
@ -803,6 +808,7 @@ struct dl_rq {
};
#ifdef CONFIG_FAIR_GROUP_SCHED
/* An entity is a task if it doesn't "own" a runqueue */
#define entity_is_task(se) (!se->my_q)
@ -820,7 +826,8 @@ static inline long se_runnable(struct sched_entity *se)
return se->runnable_weight;
}
#else
#else /* !CONFIG_FAIR_GROUP_SCHED: */
#define entity_is_task(se) 1
static inline void se_update_runnable(struct sched_entity *se) { }
@ -829,7 +836,8 @@ static inline long se_runnable(struct sched_entity *se)
{
return !!se->on_rq;
}
#endif
#endif /* !CONFIG_FAIR_GROUP_SCHED */
#ifdef CONFIG_SMP
/*
@ -874,7 +882,7 @@ struct root_domain {
*/
bool overloaded;
/* Indicate one or more cpus over-utilized (tipping point) */
/* Indicate one or more CPUs over-utilized (tipping point) */
bool overutilized;
/*
@ -988,7 +996,6 @@ struct uclamp_rq {
DECLARE_STATIC_KEY_FALSE(sched_uclamp_used);
#endif /* CONFIG_UCLAMP_TASK */
struct rq;
struct balance_callback {
struct balance_callback *next;
void (*func)(struct rq *rq);
@ -1166,7 +1173,7 @@ struct rq {
#endif
#ifdef CONFIG_CPU_IDLE
/* Must be inspected within a rcu lock section */
/* Must be inspected within a RCU lock section */
struct cpuidle_state *idle_state;
#endif
@ -1247,7 +1254,6 @@ DECLARE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
#define cpu_curr(cpu) (cpu_rq(cpu)->curr)
#define raw_rq() raw_cpu_ptr(&runqueues)
struct sched_group;
#ifdef CONFIG_SCHED_CORE
static inline struct cpumask *sched_group_span(struct sched_group *sg);
@ -1283,9 +1289,10 @@ static inline raw_spinlock_t *__rq_lockp(struct rq *rq)
return &rq->__lock;
}
bool cfs_prio_less(const struct task_struct *a, const struct task_struct *b,
bool fi);
void task_vruntime_update(struct rq *rq, struct task_struct *p, bool in_fi);
extern bool
cfs_prio_less(const struct task_struct *a, const struct task_struct *b, bool fi);
extern void task_vruntime_update(struct rq *rq, struct task_struct *p, bool in_fi);
/*
* Helpers to check if the CPU's core cookie matches with the task's cookie
@ -1353,7 +1360,7 @@ extern void sched_core_dequeue(struct rq *rq, struct task_struct *p, int flags);
extern void sched_core_get(void);
extern void sched_core_put(void);
#else /* !CONFIG_SCHED_CORE */
#else /* !CONFIG_SCHED_CORE: */
static inline bool sched_core_enabled(struct rq *rq)
{
@ -1391,7 +1398,8 @@ static inline bool sched_group_cookie_match(struct rq *rq,
{
return true;
}
#endif /* CONFIG_SCHED_CORE */
#endif /* !CONFIG_SCHED_CORE */
static inline void lockdep_assert_rq_held(struct rq *rq)
{
@ -1422,8 +1430,10 @@ static inline void raw_spin_rq_unlock_irq(struct rq *rq)
static inline unsigned long _raw_spin_rq_lock_irqsave(struct rq *rq)
{
unsigned long flags;
local_irq_save(flags);
raw_spin_rq_lock(rq);
return flags;
}
@ -1452,6 +1462,7 @@ static inline void update_idle_core(struct rq *rq) { }
#endif
#ifdef CONFIG_FAIR_GROUP_SCHED
static inline struct task_struct *task_of(struct sched_entity *se)
{
SCHED_WARN_ON(!entity_is_task(se));
@ -1475,7 +1486,7 @@ static inline struct cfs_rq *group_cfs_rq(struct sched_entity *grp)
return grp->my_q;
}
#else
#else /* !CONFIG_FAIR_GROUP_SCHED: */
#define task_of(_se) container_of(_se, struct task_struct, se)
@ -1497,7 +1508,8 @@ static inline struct cfs_rq *group_cfs_rq(struct sched_entity *grp)
{
return NULL;
}
#endif
#endif /* !CONFIG_FAIR_GROUP_SCHED */
extern void update_rq_clock(struct rq *rq);
@ -1651,9 +1663,11 @@ static inline void rq_repin_lock(struct rq *rq, struct rq_flags *rf)
#endif
}
extern
struct rq *__task_rq_lock(struct task_struct *p, struct rq_flags *rf)
__acquires(rq->lock);
extern
struct rq *task_rq_lock(struct task_struct *p, struct rq_flags *rf)
__acquires(p->pi_lock)
__acquires(rq->lock);
@ -1680,48 +1694,42 @@ DEFINE_LOCK_GUARD_1(task_rq_lock, struct task_struct,
task_rq_unlock(_T->rq, _T->lock, &_T->rf),
struct rq *rq; struct rq_flags rf)
static inline void
rq_lock_irqsave(struct rq *rq, struct rq_flags *rf)
static inline void rq_lock_irqsave(struct rq *rq, struct rq_flags *rf)
__acquires(rq->lock)
{
raw_spin_rq_lock_irqsave(rq, rf->flags);
rq_pin_lock(rq, rf);
}
static inline void
rq_lock_irq(struct rq *rq, struct rq_flags *rf)
static inline void rq_lock_irq(struct rq *rq, struct rq_flags *rf)
__acquires(rq->lock)
{
raw_spin_rq_lock_irq(rq);
rq_pin_lock(rq, rf);
}
static inline void
rq_lock(struct rq *rq, struct rq_flags *rf)
static inline void rq_lock(struct rq *rq, struct rq_flags *rf)
__acquires(rq->lock)
{
raw_spin_rq_lock(rq);
rq_pin_lock(rq, rf);
}
static inline void
rq_unlock_irqrestore(struct rq *rq, struct rq_flags *rf)
static inline void rq_unlock_irqrestore(struct rq *rq, struct rq_flags *rf)
__releases(rq->lock)
{
rq_unpin_lock(rq, rf);
raw_spin_rq_unlock_irqrestore(rq, rf->flags);
}
static inline void
rq_unlock_irq(struct rq *rq, struct rq_flags *rf)
static inline void rq_unlock_irq(struct rq *rq, struct rq_flags *rf)
__releases(rq->lock)
{
rq_unpin_lock(rq, rf);
raw_spin_rq_unlock_irq(rq);
}
static inline void
rq_unlock(struct rq *rq, struct rq_flags *rf)
static inline void rq_unlock(struct rq *rq, struct rq_flags *rf)
__releases(rq->lock)
{
rq_unpin_lock(rq, rf);
@ -1743,8 +1751,7 @@ DEFINE_LOCK_GUARD_1(rq_lock_irqsave, struct rq,
rq_unlock_irqrestore(_T->lock, &_T->rf),
struct rq_flags rf)
static inline struct rq *
this_rq_lock_irq(struct rq_flags *rf)
static inline struct rq *this_rq_lock_irq(struct rq_flags *rf)
__acquires(rq->lock)
{
struct rq *rq;
@ -1752,15 +1759,18 @@ this_rq_lock_irq(struct rq_flags *rf)
local_irq_disable();
rq = this_rq();
rq_lock(rq, rf);
return rq;
}
#ifdef CONFIG_NUMA
enum numa_topology_type {
NUMA_DIRECT,
NUMA_GLUELESS_MESH,
NUMA_BACKPLANE,
};
extern enum numa_topology_type sched_numa_topology_type;
extern int sched_max_numa_distance;
extern bool find_numa_distance(int distance);
@ -1769,18 +1779,23 @@ extern void sched_update_numa(int cpu, bool online);
extern void sched_domains_numa_masks_set(unsigned int cpu);
extern void sched_domains_numa_masks_clear(unsigned int cpu);
extern int sched_numa_find_closest(const struct cpumask *cpus, int cpu);
#else
#else /* !CONFIG_NUMA: */
static inline void sched_init_numa(int offline_node) { }
static inline void sched_update_numa(int cpu, bool online) { }
static inline void sched_domains_numa_masks_set(unsigned int cpu) { }
static inline void sched_domains_numa_masks_clear(unsigned int cpu) { }
static inline int sched_numa_find_closest(const struct cpumask *cpus, int cpu)
{
return nr_cpu_ids;
}
#endif
#endif /* !CONFIG_NUMA */
#ifdef CONFIG_NUMA_BALANCING
/* The regions in numa_faults array from task_struct */
enum numa_faults_stats {
NUMA_MEM = 0,
@ -1788,17 +1803,21 @@ enum numa_faults_stats {
NUMA_MEMBUF,
NUMA_CPUBUF
};
extern void sched_setnuma(struct task_struct *p, int node);
extern int migrate_task_to(struct task_struct *p, int cpu);
extern int migrate_swap(struct task_struct *p, struct task_struct *t,
int cpu, int scpu);
extern void init_numa_balancing(unsigned long clone_flags, struct task_struct *p);
#else
#else /* !CONFIG_NUMA_BALANCING: */
static inline void
init_numa_balancing(unsigned long clone_flags, struct task_struct *p)
{
}
#endif /* CONFIG_NUMA_BALANCING */
#endif /* !CONFIG_NUMA_BALANCING */
#ifdef CONFIG_SMP
@ -1823,8 +1842,7 @@ queue_balance_callback(struct rq *rq,
}
#define rcu_dereference_check_sched_domain(p) \
rcu_dereference_check((p), \
lockdep_is_held(&sched_domains_mutex))
rcu_dereference_check((p), lockdep_is_held(&sched_domains_mutex))
/*
* The domain tree (rq->sd) is protected by RCU's quiescent state transition.
@ -1895,6 +1913,7 @@ DECLARE_PER_CPU(struct sched_domain_shared __rcu *, sd_llc_shared);
DECLARE_PER_CPU(struct sched_domain __rcu *, sd_numa);
DECLARE_PER_CPU(struct sched_domain __rcu *, sd_asym_packing);
DECLARE_PER_CPU(struct sched_domain __rcu *, sd_asym_cpucapacity);
extern struct static_key_false sched_asym_cpucapacity;
extern struct static_key_false sched_cluster_active;
@ -1958,15 +1977,11 @@ static inline struct cpumask *group_balance_mask(struct sched_group *sg)
extern int group_balance_cpu(struct sched_group *sg);
#ifdef CONFIG_SCHED_DEBUG
void update_sched_domain_debugfs(void);
void dirty_sched_domain_sysctl(int cpu);
extern void update_sched_domain_debugfs(void);
extern void dirty_sched_domain_sysctl(int cpu);
#else
static inline void update_sched_domain_debugfs(void)
{
}
static inline void dirty_sched_domain_sysctl(int cpu)
{
}
static inline void update_sched_domain_debugfs(void) { }
static inline void dirty_sched_domain_sysctl(int cpu) { }
#endif
extern int sched_update_scaling(void);
@ -1977,6 +1992,7 @@ static inline const struct cpumask *task_user_cpus(struct task_struct *p)
return cpu_possible_mask; /* &init_task.cpus_mask */
return p->user_cpus_ptr;
}
#endif /* CONFIG_SMP */
#include "stats.h"
@ -1999,13 +2015,13 @@ static inline void sched_core_tick(struct rq *rq)
__sched_core_tick(rq);
}
#else
#else /* !(CONFIG_SCHED_CORE && CONFIG_SCHEDSTATS): */
static inline void sched_core_account_forceidle(struct rq *rq) { }
static inline void sched_core_tick(struct rq *rq) { }
#endif /* CONFIG_SCHED_CORE && CONFIG_SCHEDSTATS */
#endif /* !(CONFIG_SCHED_CORE && CONFIG_SCHEDSTATS) */
#ifdef CONFIG_CGROUP_SCHED
@ -2047,15 +2063,16 @@ static inline void set_task_rq(struct task_struct *p, unsigned int cpu)
#endif
}
#else /* CONFIG_CGROUP_SCHED */
#else /* !CONFIG_CGROUP_SCHED: */
static inline void set_task_rq(struct task_struct *p, unsigned int cpu) { }
static inline struct task_group *task_group(struct task_struct *p)
{
return NULL;
}
#endif /* CONFIG_CGROUP_SCHED */
#endif /* !CONFIG_CGROUP_SCHED */
static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu)
{
@ -2100,6 +2117,7 @@ enum {
extern const_debug unsigned int sysctl_sched_features;
#ifdef CONFIG_JUMP_LABEL
#define SCHED_FEAT(name, enabled) \
static __always_inline bool static_branch_##name(struct static_key *key) \
{ \
@ -2112,13 +2130,13 @@ static __always_inline bool static_branch_##name(struct static_key *key) \
extern struct static_key sched_feat_keys[__SCHED_FEAT_NR];
#define sched_feat(x) (static_branch_##x(&sched_feat_keys[__SCHED_FEAT_##x]))
#else /* !CONFIG_JUMP_LABEL */
#else /* !CONFIG_JUMP_LABEL: */
#define sched_feat(x) (sysctl_sched_features & (1UL << __SCHED_FEAT_##x))
#endif /* CONFIG_JUMP_LABEL */
#endif /* !CONFIG_JUMP_LABEL */
#else /* !SCHED_DEBUG */
#else /* !SCHED_DEBUG: */
/*
* Each translation unit has its own copy of sysctl_sched_features to allow
@ -2134,7 +2152,7 @@ static const_debug __maybe_unused unsigned int sysctl_sched_features =
#define sched_feat(x) !!(sysctl_sched_features & (1UL << __SCHED_FEAT_##x))
#endif /* SCHED_DEBUG */
#endif /* !SCHED_DEBUG */
extern struct static_key_false sched_numa_balancing;
extern struct static_key_false sched_schedstats;
@ -2403,8 +2421,19 @@ extern void update_group_capacity(struct sched_domain *sd, int cpu);
extern void sched_balance_trigger(struct rq *rq);
extern int __set_cpus_allowed_ptr(struct task_struct *p, struct affinity_context *ctx);
extern void set_cpus_allowed_common(struct task_struct *p, struct affinity_context *ctx);
static inline cpumask_t *alloc_user_cpus_ptr(int node)
{
/*
* See do_set_cpus_allowed() above for the rcu_head usage.
*/
int size = max_t(int, cpumask_size(), sizeof(struct rcu_head));
return kmalloc_node(size, GFP_KERNEL, node);
}
static inline struct task_struct *get_push_task(struct rq *rq)
{
struct task_struct *p = rq->curr;
@ -2426,9 +2455,23 @@ static inline struct task_struct *get_push_task(struct rq *rq)
extern int push_cpu_stop(void *arg);
#endif
#else /* !CONFIG_SMP: */
static inline int __set_cpus_allowed_ptr(struct task_struct *p,
struct affinity_context *ctx)
{
return set_cpus_allowed_ptr(p, ctx->new_mask);
}
static inline cpumask_t *alloc_user_cpus_ptr(int node)
{
return NULL;
}
#endif /* !CONFIG_SMP */
#ifdef CONFIG_CPU_IDLE
static inline void idle_set_state(struct rq *rq,
struct cpuidle_state *idle_state)
{
@ -2441,7 +2484,9 @@ static inline struct cpuidle_state *idle_get_state(struct rq *rq)
return rq->idle_state;
}
#else
#else /* !CONFIG_CPU_IDLE: */
static inline void idle_set_state(struct rq *rq,
struct cpuidle_state *idle_state)
{
@ -2451,7 +2496,8 @@ static inline struct cpuidle_state *idle_get_state(struct rq *rq)
{
return NULL;
}
#endif
#endif /* !CONFIG_CPU_IDLE */
extern void schedule_idle(void);
asmlinkage void schedule_user(void);
@ -2464,7 +2510,7 @@ extern void init_sched_dl_class(void);
extern void init_sched_rt_class(void);
extern void init_sched_fair_class(void);
extern void reweight_task(struct task_struct *p, int prio);
extern void reweight_task(struct task_struct *p, const struct load_weight *lw);
extern void resched_curr(struct rq *rq);
extern void resched_cpu(int cpu);
@ -2480,7 +2526,8 @@ extern void init_dl_entity(struct sched_dl_entity *dl_se);
#define RATIO_SHIFT 8
#define MAX_BW_BITS (64 - BW_SHIFT)
#define MAX_BW ((1ULL << MAX_BW_BITS) - 1)
unsigned long to_ratio(u64 period, u64 runtime);
extern unsigned long to_ratio(u64 period, u64 runtime);
extern void init_entity_runnable_average(struct sched_entity *se);
extern void post_init_entity_util_avg(struct task_struct *p);
@ -2506,10 +2553,10 @@ static inline void sched_update_tick_dependency(struct rq *rq)
else
tick_nohz_dep_set_cpu(cpu, TICK_DEP_BIT_SCHED);
}
#else
#else /* !CONFIG_NO_HZ_FULL: */
static inline int sched_tick_offload_init(void) { return 0; }
static inline void sched_update_tick_dependency(struct rq *rq) { }
#endif
#endif /* !CONFIG_NO_HZ_FULL */
static inline void add_nr_running(struct rq *rq, unsigned count)
{
@ -2596,9 +2643,9 @@ static inline int hrtick_enabled_dl(struct rq *rq)
return hrtick_enabled(rq);
}
void hrtick_start(struct rq *rq, u64 delay);
extern void hrtick_start(struct rq *rq, u64 delay);
#else
#else /* !CONFIG_SCHED_HRTICK: */
static inline int hrtick_enabled_fair(struct rq *rq)
{
@ -2615,13 +2662,10 @@ static inline int hrtick_enabled(struct rq *rq)
return 0;
}
#endif /* CONFIG_SCHED_HRTICK */
#endif /* !CONFIG_SCHED_HRTICK */
#ifndef arch_scale_freq_tick
static __always_inline
void arch_scale_freq_tick(void)
{
}
static __always_inline void arch_scale_freq_tick(void) { }
#endif
#ifndef arch_scale_freq_capacity
@ -2718,7 +2762,7 @@ static inline int _double_lock_balance(struct rq *this_rq, struct rq *busiest)
return 1;
}
#else
#else /* !CONFIG_PREEMPTION: */
/*
* Unfair double_lock_balance: Optimizes throughput at the expense of
* latency by eliminating extra atomic operations when the locks are
@ -2749,7 +2793,7 @@ static inline int _double_lock_balance(struct rq *this_rq, struct rq *busiest)
return 1;
}
#endif /* CONFIG_PREEMPTION */
#endif /* !CONFIG_PREEMPTION */
/*
* double_lock_balance - lock the busiest runqueue, this_rq is locked already.
@ -2825,9 +2869,10 @@ static inline void double_rq_unlock(struct rq *rq1, struct rq *rq2)
extern void set_rq_online (struct rq *rq);
extern void set_rq_offline(struct rq *rq);
extern bool sched_smp_initialized;
#else /* CONFIG_SMP */
#else /* !CONFIG_SMP: */
/*
* double_rq_lock - safely lock two runqueues
@ -2861,7 +2906,7 @@ static inline void double_rq_unlock(struct rq *rq1, struct rq *rq2)
__release(rq2->lock);
}
#endif
#endif /* !CONFIG_SMP */
DEFINE_LOCK_GUARD_2(double_rq_lock, struct rq,
double_rq_lock(_T->lock, _T->lock2),
@ -2883,15 +2928,14 @@ extern void print_dl_rq(struct seq_file *m, int cpu, struct dl_rq *dl_rq);
extern void resched_latency_warn(int cpu, u64 latency);
# ifdef CONFIG_NUMA_BALANCING
extern void
show_numa_stats(struct task_struct *p, struct seq_file *m);
extern void show_numa_stats(struct task_struct *p, struct seq_file *m);
extern void
print_numa_stats(struct seq_file *m, int node, unsigned long tsf,
unsigned long tpf, unsigned long gsf, unsigned long gpf);
# endif /* CONFIG_NUMA_BALANCING */
#else
#else /* !CONFIG_SCHED_DEBUG: */
static inline void resched_latency_warn(int cpu, u64 latency) { }
#endif /* CONFIG_SCHED_DEBUG */
#endif /* !CONFIG_SCHED_DEBUG */
extern void init_cfs_rq(struct cfs_rq *cfs_rq);
extern void init_rt_rq(struct rt_rq *rt_rq);
@ -2901,6 +2945,7 @@ extern void cfs_bandwidth_usage_inc(void);
extern void cfs_bandwidth_usage_dec(void);
#ifdef CONFIG_NO_HZ_COMMON
#define NOHZ_BALANCE_KICK_BIT 0
#define NOHZ_STATS_KICK_BIT 1
#define NOHZ_NEWILB_KICK_BIT 2
@ -2920,9 +2965,9 @@ extern void cfs_bandwidth_usage_dec(void);
#define nohz_flags(cpu) (&cpu_rq(cpu)->nohz_flags)
extern void nohz_balance_exit_idle(struct rq *rq);
#else
#else /* !CONFIG_NO_HZ_COMMON: */
static inline void nohz_balance_exit_idle(struct rq *rq) { }
#endif
#endif /* !CONFIG_NO_HZ_COMMON */
#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
extern void nohz_run_idle_balance(int cpu);
@ -2931,6 +2976,7 @@ static inline void nohz_run_idle_balance(int cpu) { }
#endif
#ifdef CONFIG_IRQ_TIME_ACCOUNTING
struct irqtime {
u64 total;
u64 tick_delta;
@ -2958,9 +3004,11 @@ static inline u64 irq_time_read(int cpu)
return total;
}
#endif /* CONFIG_IRQ_TIME_ACCOUNTING */
#ifdef CONFIG_CPU_FREQ
DECLARE_PER_CPU(struct update_util_data __rcu *, cpufreq_update_util_data);
/**
@ -2994,9 +3042,9 @@ static inline void cpufreq_update_util(struct rq *rq, unsigned int flags)
if (data)
data->func(data, rq_clock(rq), flags);
}
#else
#else /* !CONFIG_CPU_FREQ: */
static inline void cpufreq_update_util(struct rq *rq, unsigned int flags) { }
#endif /* CONFIG_CPU_FREQ */
#endif /* !CONFIG_CPU_FREQ */
#ifdef arch_scale_freq_capacity
# ifndef arch_scale_freq_invariant
@ -3007,6 +3055,7 @@ static inline void cpufreq_update_util(struct rq *rq, unsigned int flags) {}
#endif
#ifdef CONFIG_SMP
unsigned long effective_cpu_util(int cpu, unsigned long util_cfs,
unsigned long *min,
unsigned long *max);
@ -3049,9 +3098,11 @@ static inline unsigned long cpu_util_rt(struct rq *rq)
{
return READ_ONCE(rq->avg_rt.util_avg);
}
#endif
#endif /* CONFIG_SMP */
#ifdef CONFIG_UCLAMP_TASK
unsigned long uclamp_eff_value(struct task_struct *p, enum uclamp_id clamp_id);
static inline unsigned long uclamp_rq_get(struct rq *rq,
@ -3098,9 +3149,40 @@ static inline bool uclamp_is_used(void)
{
return static_branch_likely(&sched_uclamp_used);
}
#else /* CONFIG_UCLAMP_TASK */
static inline unsigned long uclamp_eff_value(struct task_struct *p,
enum uclamp_id clamp_id)
#define for_each_clamp_id(clamp_id) \
for ((clamp_id) = 0; (clamp_id) < UCLAMP_CNT; (clamp_id)++)
extern unsigned int sysctl_sched_uclamp_util_min_rt_default;
static inline unsigned int uclamp_none(enum uclamp_id clamp_id)
{
if (clamp_id == UCLAMP_MIN)
return 0;
return SCHED_CAPACITY_SCALE;
}
/* Integer rounded range for each bucket */
#define UCLAMP_BUCKET_DELTA DIV_ROUND_CLOSEST(SCHED_CAPACITY_SCALE, UCLAMP_BUCKETS)
static inline unsigned int uclamp_bucket_id(unsigned int clamp_value)
{
return min_t(unsigned int, clamp_value / UCLAMP_BUCKET_DELTA, UCLAMP_BUCKETS - 1);
}
static inline void
uclamp_se_set(struct uclamp_se *uc_se, unsigned int value, bool user_defined)
{
uc_se->value = value;
uc_se->bucket_id = uclamp_bucket_id(value);
uc_se->user_defined = user_defined;
}
#else /* !CONFIG_UCLAMP_TASK: */
static inline unsigned long
uclamp_eff_value(struct task_struct *p, enum uclamp_id clamp_id)
{
if (clamp_id == UCLAMP_MIN)
return 0;
@ -3115,8 +3197,8 @@ static inline bool uclamp_is_used(void)
return false;
}
static inline unsigned long uclamp_rq_get(struct rq *rq,
enum uclamp_id clamp_id)
static inline unsigned long
uclamp_rq_get(struct rq *rq, enum uclamp_id clamp_id)
{
if (clamp_id == UCLAMP_MIN)
return 0;
@ -3124,8 +3206,8 @@ static inline unsigned long uclamp_rq_get(struct rq *rq,
return SCHED_CAPACITY_SCALE;
}
static inline void uclamp_rq_set(struct rq *rq, enum uclamp_id clamp_id,
unsigned int value)
static inline void
uclamp_rq_set(struct rq *rq, enum uclamp_id clamp_id, unsigned int value)
{
}
@ -3133,9 +3215,11 @@ static inline bool uclamp_rq_is_idle(struct rq *rq)
{
return false;
}
#endif /* CONFIG_UCLAMP_TASK */
#endif /* !CONFIG_UCLAMP_TASK */
#ifdef CONFIG_HAVE_SCHED_AVG_IRQ
static inline unsigned long cpu_util_irq(struct rq *rq)
{
return READ_ONCE(rq->avg_irq.util_avg);
@ -3150,7 +3234,9 @@ unsigned long scale_irq_capacity(unsigned long util, unsigned long irq, unsigned
return util;
}
#else
#else /* !CONFIG_HAVE_SCHED_AVG_IRQ: */
static inline unsigned long cpu_util_irq(struct rq *rq)
{
return 0;
@ -3161,7 +3247,8 @@ unsigned long scale_irq_capacity(unsigned long util, unsigned long irq, unsigned
{
return util;
}
#endif
#endif /* !CONFIG_HAVE_SCHED_AVG_IRQ */
#if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL)
@ -3179,11 +3266,13 @@ extern struct cpufreq_governor schedutil_gov;
#else /* ! (CONFIG_ENERGY_MODEL && CONFIG_CPU_FREQ_GOV_SCHEDUTIL) */
#define perf_domain_span(pd) NULL
static inline bool sched_energy_enabled(void) { return false; }
#endif /* CONFIG_ENERGY_MODEL && CONFIG_CPU_FREQ_GOV_SCHEDUTIL */
#ifdef CONFIG_MEMBARRIER
/*
* The scheduler provides memory barriers required by membarrier between:
* - prior user-space memory accesses and store to rq->membarrier_state,
@ -3205,13 +3294,16 @@ static inline void membarrier_switch_mm(struct rq *rq,
WRITE_ONCE(rq->membarrier_state, membarrier_state);
}
#else
#else /* !CONFIG_MEMBARRIER :*/
static inline void membarrier_switch_mm(struct rq *rq,
struct mm_struct *prev_mm,
struct mm_struct *next_mm)
{
}
#endif
#endif /* !CONFIG_MEMBARRIER */
#ifdef CONFIG_SMP
static inline bool is_per_cpu_kthread(struct task_struct *p)
@ -3263,7 +3355,7 @@ static inline void __mm_cid_put(struct mm_struct *mm, int cid)
* be held to transition to other states.
*
* State transitions synchronized with cmpxchg or try_cmpxchg need to be
* consistent across cpus, which prevents use of this_cpu_cmpxchg.
* consistent across CPUs, which prevents use of this_cpu_cmpxchg.
*/
static inline void mm_cid_put_lazy(struct task_struct *t)
{
@ -3330,6 +3422,7 @@ static inline int __mm_cid_try_get(struct mm_struct *mm)
}
if (cpumask_test_and_set_cpu(cid, cpumask))
return -1;
return cid;
}
@ -3394,6 +3487,7 @@ unlock:
raw_spin_unlock(&cid_lock);
end:
mm_cid_snapshot_time(rq, mm);
return cid;
}
@ -3416,6 +3510,7 @@ static inline int mm_cid_get(struct rq *rq, struct mm_struct *mm)
}
cid = __mm_cid_get(rq, mm);
__this_cpu_write(pcpu_cid->cid, cid);
return cid;
}
@ -3470,15 +3565,68 @@ static inline void switch_mm_cid(struct rq *rq,
next->last_mm_cid = next->mm_cid = mm_cid_get(rq, next->mm);
}
#else
#else /* !CONFIG_SCHED_MM_CID: */
static inline void switch_mm_cid(struct rq *rq, struct task_struct *prev, struct task_struct *next) { }
static inline void sched_mm_cid_migrate_from(struct task_struct *t) { }
static inline void sched_mm_cid_migrate_to(struct rq *dst_rq, struct task_struct *t) { }
static inline void task_tick_mm_cid(struct rq *rq, struct task_struct *curr) { }
static inline void init_sched_mm_cid(struct task_struct *t) { }
#endif
#endif /* !CONFIG_SCHED_MM_CID */
extern u64 avg_vruntime(struct cfs_rq *cfs_rq);
extern int entity_eligible(struct cfs_rq *cfs_rq, struct sched_entity *se);
#ifdef CONFIG_RT_MUTEXES
static inline int __rt_effective_prio(struct task_struct *pi_task, int prio)
{
if (pi_task)
prio = min(prio, pi_task->prio);
return prio;
}
static inline int rt_effective_prio(struct task_struct *p, int prio)
{
struct task_struct *pi_task = rt_mutex_get_top_task(p);
return __rt_effective_prio(pi_task, prio);
}
#else /* !CONFIG_RT_MUTEXES: */
static inline int rt_effective_prio(struct task_struct *p, int prio)
{
return prio;
}
#endif /* !CONFIG_RT_MUTEXES */
extern int __sched_setscheduler(struct task_struct *p, const struct sched_attr *attr, bool user, bool pi);
extern int __sched_setaffinity(struct task_struct *p, struct affinity_context *ctx);
extern void __setscheduler_prio(struct task_struct *p, int prio);
extern void set_load_weight(struct task_struct *p, bool update_load);
extern void enqueue_task(struct rq *rq, struct task_struct *p, int flags);
extern void dequeue_task(struct rq *rq, struct task_struct *p, int flags);
extern void check_class_changed(struct rq *rq, struct task_struct *p,
const struct sched_class *prev_class,
int oldprio);
#ifdef CONFIG_SMP
extern struct balance_callback *splice_balance_callbacks(struct rq *rq);
extern void balance_callbacks(struct rq *rq, struct balance_callback *head);
#else
static inline struct balance_callback *splice_balance_callbacks(struct rq *rq)
{
return NULL;
}
static inline void balance_callbacks(struct rq *rq, struct balance_callback *head)
{
}
#endif
#endif /* _KERNEL_SCHED_SCHED_H */

View File

@ -224,7 +224,7 @@ static inline void sched_info_dequeue(struct rq *rq, struct task_struct *t)
/*
* Called when a task finally hits the CPU. We can now calculate how
* long it was waiting to run. We also note when it began so that we
* can keep stats on how long its timeslice is.
* can keep stats on how long its time-slice is.
*/
static void sched_info_arrive(struct rq *rq, struct task_struct *t)
{

1699
kernel/sched/syscalls.c Normal file

File diff suppressed because it is too large Load Diff

View File

@ -501,7 +501,7 @@ void rq_attach_root(struct rq *rq, struct root_domain *rd)
cpumask_clear_cpu(rq->cpu, old_rd->span);
/*
* If we dont want to free the old_rd yet then
* If we don't want to free the old_rd yet then
* set old_rd to NULL to skip the freeing later
* in this function:
*/
@ -1176,7 +1176,7 @@ fail:
* uniquely identify each group (for a given domain):
*
* - The first is the balance_cpu (see should_we_balance() and the
* load-balance blub in fair.c); for each group we only want 1 CPU to
* load-balance blurb in fair.c); for each group we only want 1 CPU to
* continue balancing at a higher domain.
*
* - The second is the sched_group_capacity; we want all identical groups
@ -1388,7 +1388,7 @@ static inline void asym_cpu_capacity_update_data(int cpu)
/*
* Search if capacity already exits. If not, track which the entry
* where we should insert to keep the list ordered descendingly.
* where we should insert to keep the list ordered descending.
*/
list_for_each_entry(entry, &asym_cap_list, link) {
if (capacity == entry->capacity)
@ -1853,7 +1853,7 @@ void sched_init_numa(int offline_node)
struct cpumask ***masks;
/*
* O(nr_nodes^2) deduplicating selection sort -- in order to find the
* O(nr_nodes^2) de-duplicating selection sort -- in order to find the
* unique distances in the node_distance() table.
*/
distance_map = bitmap_alloc(NR_DISTANCE_VALUES, GFP_KERNEL);
@ -2750,7 +2750,7 @@ match2:
}
#if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL)
/* Build perf. domains: */
/* Build perf domains: */
for (i = 0; i < ndoms_new; i++) {
for (j = 0; j < n && !sched_energy_update; j++) {
if (cpumask_equal(doms_new[i], doms_cur[j]) &&
@ -2759,7 +2759,7 @@ match2:
goto match3;
}
}
/* No match - add perf. domains for a new rd */
/* No match - add perf domains for a new rd */
has_eas |= build_perf_domains(doms_new[i]);
match3:
;

View File

@ -33,7 +33,7 @@ int wake_bit_function(struct wait_queue_entry *wq_entry, unsigned mode, int sync
EXPORT_SYMBOL(wake_bit_function);
/*
* To allow interruptible waiting and asynchronous (i.e. nonblocking)
* To allow interruptible waiting and asynchronous (i.e. non-blocking)
* waiting, the actions of __wait_on_bit() and __wait_on_bit_lock() are
* permitted return codes. Nonzero return codes halt waiting and return.
*/
@ -133,7 +133,7 @@ EXPORT_SYMBOL(__wake_up_bit);
* @bit: the bit of the word being waited on
*
* There is a standard hashed waitqueue table for generic use. This
* is the part of the hashtable's accessor API that wakes up waiters
* is the part of the hash-table's accessor API that wakes up waiters
* on a bit. For instance, if one were to have waiters on a bitflag,
* one would call wake_up_bit() after clearing the bit.
*