Merge branches 'fixes.2024.04.15a', 'misc.2024.04.12a', 'rcu-sync-normal-improve.2024.04.15a', 'rcu-tasks.2024.04.15a' and 'rcutorture.2024.04.15a' into rcu-merge.2024.04.15a
fixes.2024.04.15a: RCU fixes misc.2024.04.12a: Miscellaneous fixes rcu-sync-normal-improve.2024.04.15a: Improving synchronize_rcu() call rcu-tasks.2024.04.15a: Tasks RCU updates rcutorture.2024.04.15a: Torture-test updates
This commit is contained in:
commit
64619b283b
3
.mailmap
3
.mailmap
@ -445,7 +445,8 @@ Nadav Amit <nadav.amit@gmail.com> <namit@cs.technion.ac.il>
|
||||
Nadia Yvette Chambers <nyc@holomorphy.com> William Lee Irwin III <wli@holomorphy.com>
|
||||
Naoya Horiguchi <naoya.horiguchi@nec.com> <n-horiguchi@ah.jp.nec.com>
|
||||
Nathan Chancellor <nathan@kernel.org> <natechancellor@gmail.com>
|
||||
Neeraj Upadhyay <quic_neeraju@quicinc.com> <neeraju@codeaurora.org>
|
||||
Neeraj Upadhyay <neeraj.upadhyay@kernel.org> <quic_neeraju@quicinc.com>
|
||||
Neeraj Upadhyay <neeraj.upadhyay@kernel.org> <neeraju@codeaurora.org>
|
||||
Neil Armstrong <neil.armstrong@linaro.org> <narmstrong@baylibre.com>
|
||||
Nguyen Anh Quynh <aquynh@gmail.com>
|
||||
Nicholas Piggin <npiggin@gmail.com> <npiggen@suse.de>
|
||||
|
@ -427,7 +427,7 @@ their assorted primitives.
|
||||
|
||||
This section shows a simple use of the core RCU API to protect a
|
||||
global pointer to a dynamically allocated structure. More-typical
|
||||
uses of RCU may be found in listRCU.rst, arrayRCU.rst, and NMI-RCU.rst.
|
||||
uses of RCU may be found in listRCU.rst and NMI-RCU.rst.
|
||||
::
|
||||
|
||||
struct foo {
|
||||
@ -510,8 +510,8 @@ So, to sum up:
|
||||
data item.
|
||||
|
||||
See checklist.rst for additional rules to follow when using RCU.
|
||||
And again, more-typical uses of RCU may be found in listRCU.rst,
|
||||
arrayRCU.rst, and NMI-RCU.rst.
|
||||
And again, more-typical uses of RCU may be found in listRCU.rst
|
||||
and NMI-RCU.rst.
|
||||
|
||||
.. _4_whatisRCU:
|
||||
|
||||
|
@ -5091,6 +5091,20 @@
|
||||
delay, memory pressure or callback list growing too
|
||||
big.
|
||||
|
||||
rcutree.rcu_normal_wake_from_gp= [KNL]
|
||||
Reduces a latency of synchronize_rcu() call. This approach
|
||||
maintains its own track of synchronize_rcu() callers, so it
|
||||
does not interact with regular callbacks because it does not
|
||||
use a call_rcu[_hurry]() path. Please note, this is for a
|
||||
normal grace period.
|
||||
|
||||
How to enable it:
|
||||
|
||||
echo 1 > /sys/module/rcutree/parameters/rcu_normal_wake_from_gp
|
||||
or pass a boot parameter "rcutree.rcu_normal_wake_from_gp=1"
|
||||
|
||||
Default is 0.
|
||||
|
||||
rcuscale.gp_async= [KNL]
|
||||
Measure performance of asynchronous
|
||||
grace-period primitives such as call_rcu().
|
||||
|
@ -18591,7 +18591,7 @@ F: tools/testing/selftests/resctrl/
|
||||
READ-COPY UPDATE (RCU)
|
||||
M: "Paul E. McKenney" <paulmck@kernel.org>
|
||||
M: Frederic Weisbecker <frederic@kernel.org> (kernel/rcu/tree_nocb.h)
|
||||
M: Neeraj Upadhyay <quic_neeraju@quicinc.com> (kernel/rcu/tasks.h)
|
||||
M: Neeraj Upadhyay <neeraj.upadhyay@kernel.org> (kernel/rcu/tasks.h)
|
||||
M: Joel Fernandes <joel@joelfernandes.org>
|
||||
M: Josh Triplett <josh@joshtriplett.org>
|
||||
M: Boqun Feng <boqun.feng@gmail.com>
|
||||
|
@ -55,7 +55,7 @@ config KPROBES
|
||||
depends on MODULES
|
||||
depends on HAVE_KPROBES
|
||||
select KALLSYMS
|
||||
select TASKS_RCU if PREEMPTION
|
||||
select NEED_TASKS_RCU
|
||||
help
|
||||
Kprobes allows you to trap at almost any kernel address and
|
||||
execute a callback function. register_kprobe() establishes
|
||||
@ -104,7 +104,7 @@ config STATIC_CALL_SELFTEST
|
||||
config OPTPROBES
|
||||
def_bool y
|
||||
depends on KPROBES && HAVE_OPTPROBES
|
||||
select TASKS_RCU if PREEMPTION
|
||||
select NEED_TASKS_RCU
|
||||
|
||||
config KPROBES_ON_FTRACE
|
||||
def_bool y
|
||||
|
@ -19,18 +19,18 @@ struct rcu_synchronize {
|
||||
};
|
||||
void wakeme_after_rcu(struct rcu_head *head);
|
||||
|
||||
void __wait_rcu_gp(bool checktiny, int n, call_rcu_func_t *crcu_array,
|
||||
void __wait_rcu_gp(bool checktiny, unsigned int state, int n, call_rcu_func_t *crcu_array,
|
||||
struct rcu_synchronize *rs_array);
|
||||
|
||||
#define _wait_rcu_gp(checktiny, ...) \
|
||||
do { \
|
||||
call_rcu_func_t __crcu_array[] = { __VA_ARGS__ }; \
|
||||
struct rcu_synchronize __rs_array[ARRAY_SIZE(__crcu_array)]; \
|
||||
__wait_rcu_gp(checktiny, ARRAY_SIZE(__crcu_array), \
|
||||
__crcu_array, __rs_array); \
|
||||
#define _wait_rcu_gp(checktiny, state, ...) \
|
||||
do { \
|
||||
call_rcu_func_t __crcu_array[] = { __VA_ARGS__ }; \
|
||||
struct rcu_synchronize __rs_array[ARRAY_SIZE(__crcu_array)]; \
|
||||
__wait_rcu_gp(checktiny, state, ARRAY_SIZE(__crcu_array), __crcu_array, __rs_array); \
|
||||
} while (0)
|
||||
|
||||
#define wait_rcu_gp(...) _wait_rcu_gp(false, __VA_ARGS__)
|
||||
#define wait_rcu_gp(...) _wait_rcu_gp(false, TASK_UNINTERRUPTIBLE, __VA_ARGS__)
|
||||
#define wait_rcu_gp_state(state, ...) _wait_rcu_gp(false, state, __VA_ARGS__)
|
||||
|
||||
/**
|
||||
* synchronize_rcu_mult - Wait concurrently for multiple grace periods
|
||||
@ -54,7 +54,7 @@ do { \
|
||||
* grace period.
|
||||
*/
|
||||
#define synchronize_rcu_mult(...) \
|
||||
_wait_rcu_gp(IS_ENABLED(CONFIG_TINY_RCU), __VA_ARGS__)
|
||||
_wait_rcu_gp(IS_ENABLED(CONFIG_TINY_RCU), TASK_UNINTERRUPTIBLE, __VA_ARGS__)
|
||||
|
||||
static inline void cond_resched_rcu(void)
|
||||
{
|
||||
|
@ -707,6 +707,33 @@ TRACE_EVENT_RCU(rcu_invoke_kfree_bulk_callback,
|
||||
__entry->rcuname, __entry->p, __entry->nr_records)
|
||||
);
|
||||
|
||||
/*
|
||||
* Tracepoint for a normal synchronize_rcu() states. The first argument
|
||||
* is the RCU flavor, the second argument is a pointer to rcu_head the
|
||||
* last one is an event.
|
||||
*/
|
||||
TRACE_EVENT_RCU(rcu_sr_normal,
|
||||
|
||||
TP_PROTO(const char *rcuname, struct rcu_head *rhp, const char *srevent),
|
||||
|
||||
TP_ARGS(rcuname, rhp, srevent),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field(const char *, rcuname)
|
||||
__field(void *, rhp)
|
||||
__field(const char *, srevent)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->rcuname = rcuname;
|
||||
__entry->rhp = rhp;
|
||||
__entry->srevent = srevent;
|
||||
),
|
||||
|
||||
TP_printk("%s rhp=0x%p event=%s",
|
||||
__entry->rcuname, __entry->rhp, __entry->srevent)
|
||||
);
|
||||
|
||||
/*
|
||||
* Tracepoint for exiting rcu_do_batch after RCU callbacks have been
|
||||
* invoked. The first argument is the name of the RCU flavor,
|
||||
|
@ -28,7 +28,7 @@ config BPF_SYSCALL
|
||||
bool "Enable bpf() system call"
|
||||
select BPF
|
||||
select IRQ_WORK
|
||||
select TASKS_RCU if PREEMPTION
|
||||
select NEED_TASKS_RCU
|
||||
select TASKS_TRACE_RCU
|
||||
select BINARY_PRINTF
|
||||
select NET_SOCK_MSG if NET
|
||||
|
@ -333,7 +333,7 @@ static void bpf_tramp_image_put(struct bpf_tramp_image *im)
|
||||
int err = bpf_arch_text_poke(im->ip_after_call, BPF_MOD_JUMP,
|
||||
NULL, im->ip_epilogue);
|
||||
WARN_ON(err);
|
||||
if (IS_ENABLED(CONFIG_PREEMPTION))
|
||||
if (IS_ENABLED(CONFIG_TASKS_RCU))
|
||||
call_rcu_tasks(&im->rcu, __bpf_tramp_image_put_rcu_tasks);
|
||||
else
|
||||
percpu_ref_kill(&im->pcref);
|
||||
|
@ -522,12 +522,18 @@ static inline void show_rcu_tasks_gp_kthreads(void) {}
|
||||
|
||||
#ifdef CONFIG_TASKS_RCU
|
||||
struct task_struct *get_rcu_tasks_gp_kthread(void);
|
||||
void rcu_tasks_get_gp_data(int *flags, unsigned long *gp_seq);
|
||||
#endif // # ifdef CONFIG_TASKS_RCU
|
||||
|
||||
#ifdef CONFIG_TASKS_RUDE_RCU
|
||||
struct task_struct *get_rcu_tasks_rude_gp_kthread(void);
|
||||
void rcu_tasks_rude_get_gp_data(int *flags, unsigned long *gp_seq);
|
||||
#endif // # ifdef CONFIG_TASKS_RUDE_RCU
|
||||
|
||||
#ifdef CONFIG_TASKS_TRACE_RCU
|
||||
void rcu_tasks_trace_get_gp_data(int *flags, unsigned long *gp_seq);
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_TASKS_RCU_GENERIC
|
||||
void tasks_cblist_init_generic(void);
|
||||
#else /* #ifdef CONFIG_TASKS_RCU_GENERIC */
|
||||
@ -557,8 +563,7 @@ static inline void rcu_set_jiffies_lazy_flush(unsigned long j) { }
|
||||
#endif
|
||||
|
||||
#if defined(CONFIG_TREE_RCU)
|
||||
void rcutorture_get_gp_data(enum rcutorture_type test_type, int *flags,
|
||||
unsigned long *gp_seq);
|
||||
void rcutorture_get_gp_data(int *flags, unsigned long *gp_seq);
|
||||
void do_trace_rcu_torture_read(const char *rcutorturename,
|
||||
struct rcu_head *rhp,
|
||||
unsigned long secs,
|
||||
@ -566,8 +571,7 @@ void do_trace_rcu_torture_read(const char *rcutorturename,
|
||||
unsigned long c);
|
||||
void rcu_gp_set_torture_wait(int duration);
|
||||
#else
|
||||
static inline void rcutorture_get_gp_data(enum rcutorture_type test_type,
|
||||
int *flags, unsigned long *gp_seq)
|
||||
static inline void rcutorture_get_gp_data(int *flags, unsigned long *gp_seq)
|
||||
{
|
||||
*flags = 0;
|
||||
*gp_seq = 0;
|
||||
@ -587,20 +591,16 @@ static inline void rcu_gp_set_torture_wait(int duration) { }
|
||||
|
||||
#ifdef CONFIG_TINY_SRCU
|
||||
|
||||
static inline void srcutorture_get_gp_data(enum rcutorture_type test_type,
|
||||
struct srcu_struct *sp, int *flags,
|
||||
static inline void srcutorture_get_gp_data(struct srcu_struct *sp, int *flags,
|
||||
unsigned long *gp_seq)
|
||||
{
|
||||
if (test_type != SRCU_FLAVOR)
|
||||
return;
|
||||
*flags = 0;
|
||||
*gp_seq = sp->srcu_idx;
|
||||
}
|
||||
|
||||
#elif defined(CONFIG_TREE_SRCU)
|
||||
|
||||
void srcutorture_get_gp_data(enum rcutorture_type test_type,
|
||||
struct srcu_struct *sp, int *flags,
|
||||
void srcutorture_get_gp_data(struct srcu_struct *sp, int *flags,
|
||||
unsigned long *gp_seq);
|
||||
|
||||
#endif
|
||||
|
@ -381,6 +381,9 @@ struct rcu_torture_ops {
|
||||
void (*gp_kthread_dbg)(void);
|
||||
bool (*check_boost_failed)(unsigned long gp_state, int *cpup);
|
||||
int (*stall_dur)(void);
|
||||
void (*get_gp_data)(int *flags, unsigned long *gp_seq);
|
||||
void (*gp_slow_register)(atomic_t *rgssp);
|
||||
void (*gp_slow_unregister)(atomic_t *rgssp);
|
||||
long cbflood_max;
|
||||
int irq_capable;
|
||||
int can_boost;
|
||||
@ -461,12 +464,13 @@ rcu_torture_pipe_update_one(struct rcu_torture *rp)
|
||||
WRITE_ONCE(rp->rtort_chkp, NULL);
|
||||
smp_store_release(&rtrcp->rtc_ready, 1); // Pair with smp_load_acquire().
|
||||
}
|
||||
i = READ_ONCE(rp->rtort_pipe_count);
|
||||
i = rp->rtort_pipe_count;
|
||||
if (i > RCU_TORTURE_PIPE_LEN)
|
||||
i = RCU_TORTURE_PIPE_LEN;
|
||||
atomic_inc(&rcu_torture_wcount[i]);
|
||||
WRITE_ONCE(rp->rtort_pipe_count, i + 1);
|
||||
if (rp->rtort_pipe_count >= RCU_TORTURE_PIPE_LEN) {
|
||||
ASSERT_EXCLUSIVE_WRITER(rp->rtort_pipe_count);
|
||||
if (i + 1 >= RCU_TORTURE_PIPE_LEN) {
|
||||
rp->rtort_mbtest = 0;
|
||||
return true;
|
||||
}
|
||||
@ -564,10 +568,12 @@ static struct rcu_torture_ops rcu_ops = {
|
||||
.call = call_rcu_hurry,
|
||||
.cb_barrier = rcu_barrier,
|
||||
.fqs = rcu_force_quiescent_state,
|
||||
.stats = NULL,
|
||||
.gp_kthread_dbg = show_rcu_gp_kthreads,
|
||||
.check_boost_failed = rcu_check_boost_fail,
|
||||
.stall_dur = rcu_jiffies_till_stall_check,
|
||||
.get_gp_data = rcutorture_get_gp_data,
|
||||
.gp_slow_register = rcu_gp_slow_register,
|
||||
.gp_slow_unregister = rcu_gp_slow_unregister,
|
||||
.irq_capable = 1,
|
||||
.can_boost = IS_ENABLED(CONFIG_RCU_BOOST),
|
||||
.extendables = RCUTORTURE_MAX_EXTEND,
|
||||
@ -611,9 +617,6 @@ static struct rcu_torture_ops rcu_busted_ops = {
|
||||
.sync = synchronize_rcu_busted,
|
||||
.exp_sync = synchronize_rcu_busted,
|
||||
.call = call_rcu_busted,
|
||||
.cb_barrier = NULL,
|
||||
.fqs = NULL,
|
||||
.stats = NULL,
|
||||
.irq_capable = 1,
|
||||
.name = "busted"
|
||||
};
|
||||
@ -627,6 +630,11 @@ static struct srcu_struct srcu_ctld;
|
||||
static struct srcu_struct *srcu_ctlp = &srcu_ctl;
|
||||
static struct rcu_torture_ops srcud_ops;
|
||||
|
||||
static void srcu_get_gp_data(int *flags, unsigned long *gp_seq)
|
||||
{
|
||||
srcutorture_get_gp_data(srcu_ctlp, flags, gp_seq);
|
||||
}
|
||||
|
||||
static int srcu_torture_read_lock(void)
|
||||
{
|
||||
if (cur_ops == &srcud_ops)
|
||||
@ -735,6 +743,7 @@ static struct rcu_torture_ops srcu_ops = {
|
||||
.call = srcu_torture_call,
|
||||
.cb_barrier = srcu_torture_barrier,
|
||||
.stats = srcu_torture_stats,
|
||||
.get_gp_data = srcu_get_gp_data,
|
||||
.cbflood_max = 50000,
|
||||
.irq_capable = 1,
|
||||
.no_pi_lock = IS_ENABLED(CONFIG_TINY_SRCU),
|
||||
@ -773,6 +782,7 @@ static struct rcu_torture_ops srcud_ops = {
|
||||
.call = srcu_torture_call,
|
||||
.cb_barrier = srcu_torture_barrier,
|
||||
.stats = srcu_torture_stats,
|
||||
.get_gp_data = srcu_get_gp_data,
|
||||
.cbflood_max = 50000,
|
||||
.irq_capable = 1,
|
||||
.no_pi_lock = IS_ENABLED(CONFIG_TINY_SRCU),
|
||||
@ -837,8 +847,6 @@ static struct rcu_torture_ops trivial_ops = {
|
||||
.get_gp_seq = rcu_no_completed,
|
||||
.sync = synchronize_rcu_trivial,
|
||||
.exp_sync = synchronize_rcu_trivial,
|
||||
.fqs = NULL,
|
||||
.stats = NULL,
|
||||
.irq_capable = 1,
|
||||
.name = "trivial"
|
||||
};
|
||||
@ -881,8 +889,7 @@ static struct rcu_torture_ops tasks_ops = {
|
||||
.call = call_rcu_tasks,
|
||||
.cb_barrier = rcu_barrier_tasks,
|
||||
.gp_kthread_dbg = show_rcu_tasks_classic_gp_kthread,
|
||||
.fqs = NULL,
|
||||
.stats = NULL,
|
||||
.get_gp_data = rcu_tasks_get_gp_data,
|
||||
.irq_capable = 1,
|
||||
.slow_gps = 1,
|
||||
.name = "tasks"
|
||||
@ -921,9 +928,8 @@ static struct rcu_torture_ops tasks_rude_ops = {
|
||||
.call = call_rcu_tasks_rude,
|
||||
.cb_barrier = rcu_barrier_tasks_rude,
|
||||
.gp_kthread_dbg = show_rcu_tasks_rude_gp_kthread,
|
||||
.get_gp_data = rcu_tasks_rude_get_gp_data,
|
||||
.cbflood_max = 50000,
|
||||
.fqs = NULL,
|
||||
.stats = NULL,
|
||||
.irq_capable = 1,
|
||||
.name = "tasks-rude"
|
||||
};
|
||||
@ -973,9 +979,8 @@ static struct rcu_torture_ops tasks_tracing_ops = {
|
||||
.call = call_rcu_tasks_trace,
|
||||
.cb_barrier = rcu_barrier_tasks_trace,
|
||||
.gp_kthread_dbg = show_rcu_tasks_trace_gp_kthread,
|
||||
.get_gp_data = rcu_tasks_trace_get_gp_data,
|
||||
.cbflood_max = 50000,
|
||||
.fqs = NULL,
|
||||
.stats = NULL,
|
||||
.irq_capable = 1,
|
||||
.slow_gps = 1,
|
||||
.name = "tasks-tracing"
|
||||
@ -1399,6 +1404,7 @@ rcu_torture_writer(void *arg)
|
||||
if (rp == NULL)
|
||||
continue;
|
||||
rp->rtort_pipe_count = 0;
|
||||
ASSERT_EXCLUSIVE_WRITER(rp->rtort_pipe_count);
|
||||
rcu_torture_writer_state = RTWS_DELAY;
|
||||
udelay(torture_random(&rand) & 0x3ff);
|
||||
rcu_torture_writer_state = RTWS_REPLACE;
|
||||
@ -1414,6 +1420,7 @@ rcu_torture_writer(void *arg)
|
||||
atomic_inc(&rcu_torture_wcount[i]);
|
||||
WRITE_ONCE(old_rp->rtort_pipe_count,
|
||||
old_rp->rtort_pipe_count + 1);
|
||||
ASSERT_EXCLUSIVE_WRITER(old_rp->rtort_pipe_count);
|
||||
|
||||
// Make sure readers block polled grace periods.
|
||||
if (cur_ops->get_gp_state && cur_ops->poll_gp_state) {
|
||||
@ -1586,7 +1593,8 @@ rcu_torture_writer(void *arg)
|
||||
if (list_empty(&rcu_tortures[i].rtort_free) &&
|
||||
rcu_access_pointer(rcu_torture_current) != &rcu_tortures[i]) {
|
||||
tracing_off();
|
||||
show_rcu_gp_kthreads();
|
||||
if (cur_ops->gp_kthread_dbg)
|
||||
cur_ops->gp_kthread_dbg();
|
||||
WARN(1, "%s: rtort_pipe_count: %d\n", __func__, rcu_tortures[i].rtort_pipe_count);
|
||||
rcu_ftrace_dump(DUMP_ALL);
|
||||
}
|
||||
@ -1997,7 +2005,8 @@ static bool rcu_torture_one_read(struct torture_random_state *trsp, long myid)
|
||||
preempt_disable();
|
||||
pipe_count = READ_ONCE(p->rtort_pipe_count);
|
||||
if (pipe_count > RCU_TORTURE_PIPE_LEN) {
|
||||
/* Should not happen, but... */
|
||||
// Should not happen in a correct RCU implementation,
|
||||
// happens quite often for torture_type=busted.
|
||||
pipe_count = RCU_TORTURE_PIPE_LEN;
|
||||
}
|
||||
completed = cur_ops->get_gp_seq();
|
||||
@ -2259,10 +2268,8 @@ rcu_torture_stats_print(void)
|
||||
int __maybe_unused flags = 0;
|
||||
unsigned long __maybe_unused gp_seq = 0;
|
||||
|
||||
rcutorture_get_gp_data(cur_ops->ttype,
|
||||
&flags, &gp_seq);
|
||||
srcutorture_get_gp_data(cur_ops->ttype, srcu_ctlp,
|
||||
&flags, &gp_seq);
|
||||
if (cur_ops->get_gp_data)
|
||||
cur_ops->get_gp_data(&flags, &gp_seq);
|
||||
wtp = READ_ONCE(writer_task);
|
||||
pr_alert("??? Writer stall state %s(%d) g%lu f%#x ->state %#x cpu %d\n",
|
||||
rcu_torture_writer_state_getname(),
|
||||
@ -2486,8 +2493,8 @@ static int rcu_torture_stall(void *args)
|
||||
preempt_disable();
|
||||
pr_alert("%s start on CPU %d.\n",
|
||||
__func__, raw_smp_processor_id());
|
||||
while (ULONG_CMP_LT((unsigned long)ktime_get_seconds(),
|
||||
stop_at))
|
||||
while (ULONG_CMP_LT((unsigned long)ktime_get_seconds(), stop_at) &&
|
||||
!kthread_should_stop())
|
||||
if (stall_cpu_block) {
|
||||
#ifdef CONFIG_PREEMPTION
|
||||
preempt_schedule();
|
||||
@ -2832,13 +2839,14 @@ static void rcu_torture_fwd_prog_cr(struct rcu_fwd *rfp)
|
||||
|
||||
if (!torture_must_stop() && !READ_ONCE(rcu_fwd_emergency_stop) &&
|
||||
!shutdown_time_arrived()) {
|
||||
WARN_ON(n_max_gps < MIN_FWD_CBS_LAUNDERED);
|
||||
pr_alert("%s Duration %lu barrier: %lu pending %ld n_launders: %ld n_launders_sa: %ld n_max_gps: %ld n_max_cbs: %ld cver %ld gps %ld\n",
|
||||
if (WARN_ON(n_max_gps < MIN_FWD_CBS_LAUNDERED) && cur_ops->gp_kthread_dbg)
|
||||
cur_ops->gp_kthread_dbg();
|
||||
pr_alert("%s Duration %lu barrier: %lu pending %ld n_launders: %ld n_launders_sa: %ld n_max_gps: %ld n_max_cbs: %ld cver %ld gps %ld #online %u\n",
|
||||
__func__,
|
||||
stoppedat - rfp->rcu_fwd_startat, jiffies - stoppedat,
|
||||
n_launders + n_max_cbs - n_launders_cb_snap,
|
||||
n_launders, n_launders_sa,
|
||||
n_max_gps, n_max_cbs, cver, gps);
|
||||
n_max_gps, n_max_cbs, cver, gps, num_online_cpus());
|
||||
atomic_long_add(n_max_cbs, &rcu_fwd_max_cbs);
|
||||
mutex_lock(&rcu_fwd_mutex); // Serialize histograms.
|
||||
rcu_torture_fwd_cb_hist(rfp);
|
||||
@ -3040,11 +3048,12 @@ static void rcu_torture_barrier_cbf(struct rcu_head *rcu)
|
||||
}
|
||||
|
||||
/* IPI handler to get callback posted on desired CPU, if online. */
|
||||
static void rcu_torture_barrier1cb(void *rcu_void)
|
||||
static int rcu_torture_barrier1cb(void *rcu_void)
|
||||
{
|
||||
struct rcu_head *rhp = rcu_void;
|
||||
|
||||
cur_ops->call(rhp, rcu_torture_barrier_cbf);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* kthread function to register callbacks used to test RCU barriers. */
|
||||
@ -3070,11 +3079,9 @@ static int rcu_torture_barrier_cbs(void *arg)
|
||||
* The above smp_load_acquire() ensures barrier_phase load
|
||||
* is ordered before the following ->call().
|
||||
*/
|
||||
if (smp_call_function_single(myid, rcu_torture_barrier1cb,
|
||||
&rcu, 1)) {
|
||||
// IPI failed, so use direct call from current CPU.
|
||||
if (smp_call_on_cpu(myid, rcu_torture_barrier1cb, &rcu, 1))
|
||||
cur_ops->call(&rcu, rcu_torture_barrier_cbf);
|
||||
}
|
||||
|
||||
if (atomic_dec_and_test(&barrier_cbs_count))
|
||||
wake_up(&barrier_wq);
|
||||
} while (!torture_must_stop());
|
||||
@ -3340,12 +3347,12 @@ rcu_torture_cleanup(void)
|
||||
pr_info("%s: Invoking %pS().\n", __func__, cur_ops->cb_barrier);
|
||||
cur_ops->cb_barrier();
|
||||
}
|
||||
rcu_gp_slow_unregister(NULL);
|
||||
if (cur_ops->gp_slow_unregister)
|
||||
cur_ops->gp_slow_unregister(NULL);
|
||||
return;
|
||||
}
|
||||
if (!cur_ops) {
|
||||
torture_cleanup_end();
|
||||
rcu_gp_slow_unregister(NULL);
|
||||
return;
|
||||
}
|
||||
|
||||
@ -3384,8 +3391,8 @@ rcu_torture_cleanup(void)
|
||||
fakewriter_tasks = NULL;
|
||||
}
|
||||
|
||||
rcutorture_get_gp_data(cur_ops->ttype, &flags, &gp_seq);
|
||||
srcutorture_get_gp_data(cur_ops->ttype, srcu_ctlp, &flags, &gp_seq);
|
||||
if (cur_ops->get_gp_data)
|
||||
cur_ops->get_gp_data(&flags, &gp_seq);
|
||||
pr_alert("%s: End-test grace-period state: g%ld f%#x total-gps=%ld\n",
|
||||
cur_ops->name, (long)gp_seq, flags,
|
||||
rcutorture_seq_diff(gp_seq, start_gp_seq));
|
||||
@ -3444,7 +3451,8 @@ rcu_torture_cleanup(void)
|
||||
else
|
||||
rcu_torture_print_module_parms(cur_ops, "End of test: SUCCESS");
|
||||
torture_cleanup_end();
|
||||
rcu_gp_slow_unregister(&rcu_fwd_cb_nodelay);
|
||||
if (cur_ops->gp_slow_unregister)
|
||||
cur_ops->gp_slow_unregister(NULL);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD
|
||||
@ -3756,8 +3764,8 @@ rcu_torture_init(void)
|
||||
nrealreaders = 1;
|
||||
}
|
||||
rcu_torture_print_module_parms(cur_ops, "Start of test");
|
||||
rcutorture_get_gp_data(cur_ops->ttype, &flags, &gp_seq);
|
||||
srcutorture_get_gp_data(cur_ops->ttype, srcu_ctlp, &flags, &gp_seq);
|
||||
if (cur_ops->get_gp_data)
|
||||
cur_ops->get_gp_data(&flags, &gp_seq);
|
||||
start_gp_seq = gp_seq;
|
||||
pr_alert("%s: Start-test grace-period state: g%ld f%#x\n",
|
||||
cur_ops->name, (long)gp_seq, flags);
|
||||
@ -3926,7 +3934,8 @@ rcu_torture_init(void)
|
||||
if (object_debug)
|
||||
rcu_test_debug_objects();
|
||||
torture_init_end();
|
||||
rcu_gp_slow_register(&rcu_fwd_cb_nodelay);
|
||||
if (cur_ops->gp_slow_register && !WARN_ON_ONCE(!cur_ops->gp_slow_unregister))
|
||||
cur_ops->gp_slow_register(&rcu_fwd_cb_nodelay);
|
||||
return 0;
|
||||
|
||||
unwind:
|
||||
|
@ -1826,12 +1826,9 @@ static void process_srcu(struct work_struct *work)
|
||||
srcu_reschedule(ssp, curdelay);
|
||||
}
|
||||
|
||||
void srcutorture_get_gp_data(enum rcutorture_type test_type,
|
||||
struct srcu_struct *ssp, int *flags,
|
||||
void srcutorture_get_gp_data(struct srcu_struct *ssp, int *flags,
|
||||
unsigned long *gp_seq)
|
||||
{
|
||||
if (test_type != SRCU_FLAVOR)
|
||||
return;
|
||||
*flags = 0;
|
||||
*gp_seq = rcu_seq_current(&ssp->srcu_sup->srcu_gp_seq);
|
||||
}
|
||||
|
@ -74,6 +74,7 @@ struct rcu_tasks_percpu {
|
||||
* @holdouts_func: This flavor's holdout-list scan function (optional).
|
||||
* @postgp_func: This flavor's post-grace-period function (optional).
|
||||
* @call_func: This flavor's call_rcu()-equivalent function.
|
||||
* @wait_state: Task state for synchronous grace-period waits (default TASK_UNINTERRUPTIBLE).
|
||||
* @rtpcpu: This flavor's rcu_tasks_percpu structure.
|
||||
* @percpu_enqueue_shift: Shift down CPU ID this much when enqueuing callbacks.
|
||||
* @percpu_enqueue_lim: Number of per-CPU callback queues in use for enqueuing.
|
||||
@ -107,6 +108,7 @@ struct rcu_tasks {
|
||||
holdouts_func_t holdouts_func;
|
||||
postgp_func_t postgp_func;
|
||||
call_rcu_func_t call_func;
|
||||
unsigned int wait_state;
|
||||
struct rcu_tasks_percpu __percpu *rtpcpu;
|
||||
int percpu_enqueue_shift;
|
||||
int percpu_enqueue_lim;
|
||||
@ -134,6 +136,7 @@ static struct rcu_tasks rt_name = \
|
||||
.tasks_gp_mutex = __MUTEX_INITIALIZER(rt_name.tasks_gp_mutex), \
|
||||
.gp_func = gp, \
|
||||
.call_func = call, \
|
||||
.wait_state = TASK_UNINTERRUPTIBLE, \
|
||||
.rtpcpu = &rt_name ## __percpu, \
|
||||
.lazy_jiffies = DIV_ROUND_UP(HZ, 4), \
|
||||
.name = n, \
|
||||
@ -147,7 +150,7 @@ static struct rcu_tasks rt_name = \
|
||||
|
||||
#ifdef CONFIG_TASKS_RCU
|
||||
|
||||
/* Report delay in synchronize_srcu() completion in rcu_tasks_postscan(). */
|
||||
/* Report delay of scan exiting tasklist in rcu_tasks_postscan(). */
|
||||
static void tasks_rcu_exit_srcu_stall(struct timer_list *unused);
|
||||
static DEFINE_TIMER(tasks_rcu_exit_srcu_stall_timer, tasks_rcu_exit_srcu_stall);
|
||||
#endif
|
||||
@ -638,7 +641,7 @@ static void synchronize_rcu_tasks_generic(struct rcu_tasks *rtp)
|
||||
|
||||
// If the grace-period kthread is running, use it.
|
||||
if (READ_ONCE(rtp->kthread_ptr)) {
|
||||
wait_rcu_gp(rtp->call_func);
|
||||
wait_rcu_gp_state(rtp->wait_state, rtp->call_func);
|
||||
return;
|
||||
}
|
||||
rcu_tasks_one_gp(rtp, true);
|
||||
@ -1160,6 +1163,7 @@ static int __init rcu_spawn_tasks_kthread(void)
|
||||
rcu_tasks.postscan_func = rcu_tasks_postscan;
|
||||
rcu_tasks.holdouts_func = check_all_holdout_tasks;
|
||||
rcu_tasks.postgp_func = rcu_tasks_postgp;
|
||||
rcu_tasks.wait_state = TASK_IDLE;
|
||||
rcu_spawn_tasks_kthread_generic(&rcu_tasks);
|
||||
return 0;
|
||||
}
|
||||
@ -1178,6 +1182,13 @@ struct task_struct *get_rcu_tasks_gp_kthread(void)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(get_rcu_tasks_gp_kthread);
|
||||
|
||||
void rcu_tasks_get_gp_data(int *flags, unsigned long *gp_seq)
|
||||
{
|
||||
*flags = 0;
|
||||
*gp_seq = rcu_seq_current(&rcu_tasks.tasks_gp_seq);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(rcu_tasks_get_gp_data);
|
||||
|
||||
/*
|
||||
* Protect against tasklist scan blind spot while the task is exiting and
|
||||
* may be removed from the tasklist. Do this by adding the task to yet
|
||||
@ -1199,8 +1210,7 @@ void exit_tasks_rcu_start(void)
|
||||
rtpcp = this_cpu_ptr(rcu_tasks.rtpcpu);
|
||||
t->rcu_tasks_exit_cpu = smp_processor_id();
|
||||
raw_spin_lock_irqsave_rcu_node(rtpcp, flags);
|
||||
if (!rtpcp->rtp_exit_list.next)
|
||||
INIT_LIST_HEAD(&rtpcp->rtp_exit_list);
|
||||
WARN_ON_ONCE(!rtpcp->rtp_exit_list.next);
|
||||
list_add(&t->rcu_tasks_exit_list, &rtpcp->rtp_exit_list);
|
||||
raw_spin_unlock_irqrestore_rcu_node(rtpcp, flags);
|
||||
preempt_enable();
|
||||
@ -1358,6 +1368,13 @@ struct task_struct *get_rcu_tasks_rude_gp_kthread(void)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(get_rcu_tasks_rude_gp_kthread);
|
||||
|
||||
void rcu_tasks_rude_get_gp_data(int *flags, unsigned long *gp_seq)
|
||||
{
|
||||
*flags = 0;
|
||||
*gp_seq = rcu_seq_current(&rcu_tasks_rude.tasks_gp_seq);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(rcu_tasks_rude_get_gp_data);
|
||||
|
||||
#endif /* #ifdef CONFIG_TASKS_RUDE_RCU */
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
@ -2002,7 +2019,7 @@ void show_rcu_tasks_trace_gp_kthread(void)
|
||||
{
|
||||
char buf[64];
|
||||
|
||||
sprintf(buf, "N%lu h:%lu/%lu/%lu",
|
||||
snprintf(buf, sizeof(buf), "N%lu h:%lu/%lu/%lu",
|
||||
data_race(n_trc_holdouts),
|
||||
data_race(n_heavy_reader_ofl_updates),
|
||||
data_race(n_heavy_reader_updates),
|
||||
@ -2018,6 +2035,13 @@ struct task_struct *get_rcu_tasks_trace_gp_kthread(void)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(get_rcu_tasks_trace_gp_kthread);
|
||||
|
||||
void rcu_tasks_trace_get_gp_data(int *flags, unsigned long *gp_seq)
|
||||
{
|
||||
*flags = 0;
|
||||
*gp_seq = rcu_seq_current(&rcu_tasks_trace.tasks_gp_seq);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(rcu_tasks_trace_get_gp_data);
|
||||
|
||||
#else /* #ifdef CONFIG_TASKS_TRACE_RCU */
|
||||
static void exit_tasks_rcu_finish_trace(struct task_struct *t) { }
|
||||
#endif /* #else #ifdef CONFIG_TASKS_TRACE_RCU */
|
||||
|
@ -75,6 +75,7 @@
|
||||
#define MODULE_PARAM_PREFIX "rcutree."
|
||||
|
||||
/* Data structures. */
|
||||
static void rcu_sr_normal_gp_cleanup_work(struct work_struct *);
|
||||
|
||||
static DEFINE_PER_CPU_SHARED_ALIGNED(struct rcu_data, rcu_data) = {
|
||||
.gpwrap = true,
|
||||
@ -93,6 +94,8 @@ static struct rcu_state rcu_state = {
|
||||
.exp_mutex = __MUTEX_INITIALIZER(rcu_state.exp_mutex),
|
||||
.exp_wake_mutex = __MUTEX_INITIALIZER(rcu_state.exp_wake_mutex),
|
||||
.ofl_lock = __ARCH_SPIN_LOCK_UNLOCKED,
|
||||
.srs_cleanup_work = __WORK_INITIALIZER(rcu_state.srs_cleanup_work,
|
||||
rcu_sr_normal_gp_cleanup_work),
|
||||
};
|
||||
|
||||
/* Dump rcu_node combining tree at boot to verify correct setup. */
|
||||
@ -536,17 +539,10 @@ static struct rcu_node *rcu_get_root(void)
|
||||
/*
|
||||
* Send along grace-period-related data for rcutorture diagnostics.
|
||||
*/
|
||||
void rcutorture_get_gp_data(enum rcutorture_type test_type, int *flags,
|
||||
unsigned long *gp_seq)
|
||||
void rcutorture_get_gp_data(int *flags, unsigned long *gp_seq)
|
||||
{
|
||||
switch (test_type) {
|
||||
case RCU_FLAVOR:
|
||||
*flags = READ_ONCE(rcu_state.gp_flags);
|
||||
*gp_seq = rcu_seq_current(&rcu_state.gp_seq);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
*flags = READ_ONCE(rcu_state.gp_flags);
|
||||
*gp_seq = rcu_seq_current(&rcu_state.gp_seq);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(rcutorture_get_gp_data);
|
||||
|
||||
@ -1450,6 +1446,305 @@ static void rcu_poll_gp_seq_end_unlocked(unsigned long *snap)
|
||||
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
|
||||
}
|
||||
|
||||
/*
|
||||
* There is a single llist, which is used for handling
|
||||
* synchronize_rcu() users' enqueued rcu_synchronize nodes.
|
||||
* Within this llist, there are two tail pointers:
|
||||
*
|
||||
* wait tail: Tracks the set of nodes, which need to
|
||||
* wait for the current GP to complete.
|
||||
* done tail: Tracks the set of nodes, for which grace
|
||||
* period has elapsed. These nodes processing
|
||||
* will be done as part of the cleanup work
|
||||
* execution by a kworker.
|
||||
*
|
||||
* At every grace period init, a new wait node is added
|
||||
* to the llist. This wait node is used as wait tail
|
||||
* for this new grace period. Given that there are a fixed
|
||||
* number of wait nodes, if all wait nodes are in use
|
||||
* (which can happen when kworker callback processing
|
||||
* is delayed) and additional grace period is requested.
|
||||
* This means, a system is slow in processing callbacks.
|
||||
*
|
||||
* TODO: If a slow processing is detected, a first node
|
||||
* in the llist should be used as a wait-tail for this
|
||||
* grace period, therefore users which should wait due
|
||||
* to a slow process are handled by _this_ grace period
|
||||
* and not next.
|
||||
*
|
||||
* Below is an illustration of how the done and wait
|
||||
* tail pointers move from one set of rcu_synchronize nodes
|
||||
* to the other, as grace periods start and finish and
|
||||
* nodes are processed by kworker.
|
||||
*
|
||||
*
|
||||
* a. Initial llist callbacks list:
|
||||
*
|
||||
* +----------+ +--------+ +-------+
|
||||
* | | | | | |
|
||||
* | head |---------> | cb2 |--------->| cb1 |
|
||||
* | | | | | |
|
||||
* +----------+ +--------+ +-------+
|
||||
*
|
||||
*
|
||||
*
|
||||
* b. New GP1 Start:
|
||||
*
|
||||
* WAIT TAIL
|
||||
* |
|
||||
* |
|
||||
* v
|
||||
* +----------+ +--------+ +--------+ +-------+
|
||||
* | | | | | | | |
|
||||
* | head ------> wait |------> cb2 |------> | cb1 |
|
||||
* | | | head1 | | | | |
|
||||
* +----------+ +--------+ +--------+ +-------+
|
||||
*
|
||||
*
|
||||
*
|
||||
* c. GP completion:
|
||||
*
|
||||
* WAIT_TAIL == DONE_TAIL
|
||||
*
|
||||
* DONE TAIL
|
||||
* |
|
||||
* |
|
||||
* v
|
||||
* +----------+ +--------+ +--------+ +-------+
|
||||
* | | | | | | | |
|
||||
* | head ------> wait |------> cb2 |------> | cb1 |
|
||||
* | | | head1 | | | | |
|
||||
* +----------+ +--------+ +--------+ +-------+
|
||||
*
|
||||
*
|
||||
*
|
||||
* d. New callbacks and GP2 start:
|
||||
*
|
||||
* WAIT TAIL DONE TAIL
|
||||
* | |
|
||||
* | |
|
||||
* v v
|
||||
* +----------+ +------+ +------+ +------+ +-----+ +-----+ +-----+
|
||||
* | | | | | | | | | | | | | |
|
||||
* | head ------> wait |--->| cb4 |--->| cb3 |--->|wait |--->| cb2 |--->| cb1 |
|
||||
* | | | head2| | | | | |head1| | | | |
|
||||
* +----------+ +------+ +------+ +------+ +-----+ +-----+ +-----+
|
||||
*
|
||||
*
|
||||
*
|
||||
* e. GP2 completion:
|
||||
*
|
||||
* WAIT_TAIL == DONE_TAIL
|
||||
* DONE TAIL
|
||||
* |
|
||||
* |
|
||||
* v
|
||||
* +----------+ +------+ +------+ +------+ +-----+ +-----+ +-----+
|
||||
* | | | | | | | | | | | | | |
|
||||
* | head ------> wait |--->| cb4 |--->| cb3 |--->|wait |--->| cb2 |--->| cb1 |
|
||||
* | | | head2| | | | | |head1| | | | |
|
||||
* +----------+ +------+ +------+ +------+ +-----+ +-----+ +-----+
|
||||
*
|
||||
*
|
||||
* While the llist state transitions from d to e, a kworker
|
||||
* can start executing rcu_sr_normal_gp_cleanup_work() and
|
||||
* can observe either the old done tail (@c) or the new
|
||||
* done tail (@e). So, done tail updates and reads need
|
||||
* to use the rel-acq semantics. If the concurrent kworker
|
||||
* observes the old done tail, the newly queued work
|
||||
* execution will process the updated done tail. If the
|
||||
* concurrent kworker observes the new done tail, then
|
||||
* the newly queued work will skip processing the done
|
||||
* tail, as workqueue semantics guarantees that the new
|
||||
* work is executed only after the previous one completes.
|
||||
*
|
||||
* f. kworker callbacks processing complete:
|
||||
*
|
||||
*
|
||||
* DONE TAIL
|
||||
* |
|
||||
* |
|
||||
* v
|
||||
* +----------+ +--------+
|
||||
* | | | |
|
||||
* | head ------> wait |
|
||||
* | | | head2 |
|
||||
* +----------+ +--------+
|
||||
*
|
||||
*/
|
||||
static bool rcu_sr_is_wait_head(struct llist_node *node)
|
||||
{
|
||||
return &(rcu_state.srs_wait_nodes)[0].node <= node &&
|
||||
node <= &(rcu_state.srs_wait_nodes)[SR_NORMAL_GP_WAIT_HEAD_MAX - 1].node;
|
||||
}
|
||||
|
||||
static struct llist_node *rcu_sr_get_wait_head(void)
|
||||
{
|
||||
struct sr_wait_node *sr_wn;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < SR_NORMAL_GP_WAIT_HEAD_MAX; i++) {
|
||||
sr_wn = &(rcu_state.srs_wait_nodes)[i];
|
||||
|
||||
if (!atomic_cmpxchg_acquire(&sr_wn->inuse, 0, 1))
|
||||
return &sr_wn->node;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void rcu_sr_put_wait_head(struct llist_node *node)
|
||||
{
|
||||
struct sr_wait_node *sr_wn = container_of(node, struct sr_wait_node, node);
|
||||
|
||||
atomic_set_release(&sr_wn->inuse, 0);
|
||||
}
|
||||
|
||||
/* Disabled by default. */
|
||||
static int rcu_normal_wake_from_gp;
|
||||
module_param(rcu_normal_wake_from_gp, int, 0644);
|
||||
static struct workqueue_struct *sync_wq;
|
||||
|
||||
static void rcu_sr_normal_complete(struct llist_node *node)
|
||||
{
|
||||
struct rcu_synchronize *rs = container_of(
|
||||
(struct rcu_head *) node, struct rcu_synchronize, head);
|
||||
unsigned long oldstate = (unsigned long) rs->head.func;
|
||||
|
||||
WARN_ONCE(IS_ENABLED(CONFIG_PROVE_RCU) &&
|
||||
!poll_state_synchronize_rcu(oldstate),
|
||||
"A full grace period is not passed yet: %lu",
|
||||
rcu_seq_diff(get_state_synchronize_rcu(), oldstate));
|
||||
|
||||
/* Finally. */
|
||||
complete(&rs->completion);
|
||||
}
|
||||
|
||||
static void rcu_sr_normal_gp_cleanup_work(struct work_struct *work)
|
||||
{
|
||||
struct llist_node *done, *rcu, *next, *head;
|
||||
|
||||
/*
|
||||
* This work execution can potentially execute
|
||||
* while a new done tail is being updated by
|
||||
* grace period kthread in rcu_sr_normal_gp_cleanup().
|
||||
* So, read and updates of done tail need to
|
||||
* follow acq-rel semantics.
|
||||
*
|
||||
* Given that wq semantics guarantees that a single work
|
||||
* cannot execute concurrently by multiple kworkers,
|
||||
* the done tail list manipulations are protected here.
|
||||
*/
|
||||
done = smp_load_acquire(&rcu_state.srs_done_tail);
|
||||
if (!done)
|
||||
return;
|
||||
|
||||
WARN_ON_ONCE(!rcu_sr_is_wait_head(done));
|
||||
head = done->next;
|
||||
done->next = NULL;
|
||||
|
||||
/*
|
||||
* The dummy node, which is pointed to by the
|
||||
* done tail which is acq-read above is not removed
|
||||
* here. This allows lockless additions of new
|
||||
* rcu_synchronize nodes in rcu_sr_normal_add_req(),
|
||||
* while the cleanup work executes. The dummy
|
||||
* nodes is removed, in next round of cleanup
|
||||
* work execution.
|
||||
*/
|
||||
llist_for_each_safe(rcu, next, head) {
|
||||
if (!rcu_sr_is_wait_head(rcu)) {
|
||||
rcu_sr_normal_complete(rcu);
|
||||
continue;
|
||||
}
|
||||
|
||||
rcu_sr_put_wait_head(rcu);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Helper function for rcu_gp_cleanup().
|
||||
*/
|
||||
static void rcu_sr_normal_gp_cleanup(void)
|
||||
{
|
||||
struct llist_node *wait_tail, *next, *rcu;
|
||||
int done = 0;
|
||||
|
||||
wait_tail = rcu_state.srs_wait_tail;
|
||||
if (wait_tail == NULL)
|
||||
return;
|
||||
|
||||
rcu_state.srs_wait_tail = NULL;
|
||||
ASSERT_EXCLUSIVE_WRITER(rcu_state.srs_wait_tail);
|
||||
WARN_ON_ONCE(!rcu_sr_is_wait_head(wait_tail));
|
||||
|
||||
/*
|
||||
* Process (a) and (d) cases. See an illustration.
|
||||
*/
|
||||
llist_for_each_safe(rcu, next, wait_tail->next) {
|
||||
if (rcu_sr_is_wait_head(rcu))
|
||||
break;
|
||||
|
||||
rcu_sr_normal_complete(rcu);
|
||||
// It can be last, update a next on this step.
|
||||
wait_tail->next = next;
|
||||
|
||||
if (++done == SR_MAX_USERS_WAKE_FROM_GP)
|
||||
break;
|
||||
}
|
||||
|
||||
// concurrent sr_normal_gp_cleanup work might observe this update.
|
||||
smp_store_release(&rcu_state.srs_done_tail, wait_tail);
|
||||
ASSERT_EXCLUSIVE_WRITER(rcu_state.srs_done_tail);
|
||||
|
||||
/*
|
||||
* We schedule a work in order to perform a final processing
|
||||
* of outstanding users(if still left) and releasing wait-heads
|
||||
* added by rcu_sr_normal_gp_init() call.
|
||||
*/
|
||||
queue_work(sync_wq, &rcu_state.srs_cleanup_work);
|
||||
}
|
||||
|
||||
/*
|
||||
* Helper function for rcu_gp_init().
|
||||
*/
|
||||
static bool rcu_sr_normal_gp_init(void)
|
||||
{
|
||||
struct llist_node *first;
|
||||
struct llist_node *wait_head;
|
||||
bool start_new_poll = false;
|
||||
|
||||
first = READ_ONCE(rcu_state.srs_next.first);
|
||||
if (!first || rcu_sr_is_wait_head(first))
|
||||
return start_new_poll;
|
||||
|
||||
wait_head = rcu_sr_get_wait_head();
|
||||
if (!wait_head) {
|
||||
// Kick another GP to retry.
|
||||
start_new_poll = true;
|
||||
return start_new_poll;
|
||||
}
|
||||
|
||||
/* Inject a wait-dummy-node. */
|
||||
llist_add(wait_head, &rcu_state.srs_next);
|
||||
|
||||
/*
|
||||
* A waiting list of rcu_synchronize nodes should be empty on
|
||||
* this step, since a GP-kthread, rcu_gp_init() -> gp_cleanup(),
|
||||
* rolls it over. If not, it is a BUG, warn a user.
|
||||
*/
|
||||
WARN_ON_ONCE(rcu_state.srs_wait_tail != NULL);
|
||||
rcu_state.srs_wait_tail = wait_head;
|
||||
ASSERT_EXCLUSIVE_WRITER(rcu_state.srs_wait_tail);
|
||||
|
||||
return start_new_poll;
|
||||
}
|
||||
|
||||
static void rcu_sr_normal_add_req(struct rcu_synchronize *rs)
|
||||
{
|
||||
llist_add((struct llist_node *) &rs->head, &rcu_state.srs_next);
|
||||
}
|
||||
|
||||
/*
|
||||
* Initialize a new grace period. Return false if no grace period required.
|
||||
*/
|
||||
@ -1460,6 +1755,7 @@ static noinline_for_stack bool rcu_gp_init(void)
|
||||
unsigned long mask;
|
||||
struct rcu_data *rdp;
|
||||
struct rcu_node *rnp = rcu_get_root();
|
||||
bool start_new_poll;
|
||||
|
||||
WRITE_ONCE(rcu_state.gp_activity, jiffies);
|
||||
raw_spin_lock_irq_rcu_node(rnp);
|
||||
@ -1484,10 +1780,24 @@ static noinline_for_stack bool rcu_gp_init(void)
|
||||
/* Record GP times before starting GP, hence rcu_seq_start(). */
|
||||
rcu_seq_start(&rcu_state.gp_seq);
|
||||
ASSERT_EXCLUSIVE_WRITER(rcu_state.gp_seq);
|
||||
start_new_poll = rcu_sr_normal_gp_init();
|
||||
trace_rcu_grace_period(rcu_state.name, rcu_state.gp_seq, TPS("start"));
|
||||
rcu_poll_gp_seq_start(&rcu_state.gp_seq_polled_snap);
|
||||
raw_spin_unlock_irq_rcu_node(rnp);
|
||||
|
||||
/*
|
||||
* The "start_new_poll" is set to true, only when this GP is not able
|
||||
* to handle anything and there are outstanding users. It happens when
|
||||
* the rcu_sr_normal_gp_init() function was not able to insert a dummy
|
||||
* separator to the llist, because there were no left any dummy-nodes.
|
||||
*
|
||||
* Number of dummy-nodes is fixed, it could be that we are run out of
|
||||
* them, if so we start a new pool request to repeat a try. It is rare
|
||||
* and it means that a system is doing a slow processing of callbacks.
|
||||
*/
|
||||
if (start_new_poll)
|
||||
(void) start_poll_synchronize_rcu();
|
||||
|
||||
/*
|
||||
* Apply per-leaf buffered online and offline operations to
|
||||
* the rcu_node tree. Note that this new grace period need not
|
||||
@ -1852,6 +2162,9 @@ static noinline void rcu_gp_cleanup(void)
|
||||
}
|
||||
raw_spin_unlock_irq_rcu_node(rnp);
|
||||
|
||||
// Make synchronize_rcu() users aware of the end of old grace period.
|
||||
rcu_sr_normal_gp_cleanup();
|
||||
|
||||
// If strict, make all CPUs aware of the end of the old grace period.
|
||||
if (IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD))
|
||||
on_each_cpu(rcu_strict_gp_boundary, NULL, 0);
|
||||
@ -3584,6 +3897,43 @@ static int rcu_blocking_is_gp(void)
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* Helper function for the synchronize_rcu() API.
|
||||
*/
|
||||
static void synchronize_rcu_normal(void)
|
||||
{
|
||||
struct rcu_synchronize rs;
|
||||
|
||||
trace_rcu_sr_normal(rcu_state.name, &rs.head, TPS("request"));
|
||||
|
||||
if (!READ_ONCE(rcu_normal_wake_from_gp)) {
|
||||
wait_rcu_gp(call_rcu_hurry);
|
||||
goto trace_complete_out;
|
||||
}
|
||||
|
||||
init_rcu_head_on_stack(&rs.head);
|
||||
init_completion(&rs.completion);
|
||||
|
||||
/*
|
||||
* This code might be preempted, therefore take a GP
|
||||
* snapshot before adding a request.
|
||||
*/
|
||||
if (IS_ENABLED(CONFIG_PROVE_RCU))
|
||||
rs.head.func = (void *) get_state_synchronize_rcu();
|
||||
|
||||
rcu_sr_normal_add_req(&rs);
|
||||
|
||||
/* Kick a GP and start waiting. */
|
||||
(void) start_poll_synchronize_rcu();
|
||||
|
||||
/* Now we can wait. */
|
||||
wait_for_completion(&rs.completion);
|
||||
destroy_rcu_head_on_stack(&rs.head);
|
||||
|
||||
trace_complete_out:
|
||||
trace_rcu_sr_normal(rcu_state.name, &rs.head, TPS("complete"));
|
||||
}
|
||||
|
||||
/**
|
||||
* synchronize_rcu - wait until a grace period has elapsed.
|
||||
*
|
||||
@ -3635,7 +3985,7 @@ void synchronize_rcu(void)
|
||||
if (rcu_gp_is_expedited())
|
||||
synchronize_rcu_expedited();
|
||||
else
|
||||
wait_rcu_gp(call_rcu_hurry);
|
||||
synchronize_rcu_normal();
|
||||
return;
|
||||
}
|
||||
|
||||
@ -5256,6 +5606,9 @@ void __init rcu_init(void)
|
||||
rcu_gp_wq = alloc_workqueue("rcu_gp", WQ_MEM_RECLAIM, 0);
|
||||
WARN_ON(!rcu_gp_wq);
|
||||
|
||||
sync_wq = alloc_workqueue("sync_wq", WQ_MEM_RECLAIM, 0);
|
||||
WARN_ON(!sync_wq);
|
||||
|
||||
/* Fill in default value for rcutree.qovld boot parameter. */
|
||||
/* -After- the rcu_node ->lock fields are initialized! */
|
||||
if (qovld < 0)
|
||||
|
@ -315,6 +315,19 @@ do { \
|
||||
__set_current_state(TASK_RUNNING); \
|
||||
} while (0)
|
||||
|
||||
/*
|
||||
* A max threshold for synchronize_rcu() users which are
|
||||
* awaken directly by the rcu_gp_kthread(). Left part is
|
||||
* deferred to the main worker.
|
||||
*/
|
||||
#define SR_MAX_USERS_WAKE_FROM_GP 5
|
||||
#define SR_NORMAL_GP_WAIT_HEAD_MAX 5
|
||||
|
||||
struct sr_wait_node {
|
||||
atomic_t inuse;
|
||||
struct llist_node node;
|
||||
};
|
||||
|
||||
/*
|
||||
* RCU global state, including node hierarchy. This hierarchy is
|
||||
* represented in "heap" form in a dense array. The root (first level)
|
||||
@ -400,6 +413,13 @@ struct rcu_state {
|
||||
/* Synchronize offline with */
|
||||
/* GP pre-initialization. */
|
||||
int nocb_is_setup; /* nocb is setup from boot */
|
||||
|
||||
/* synchronize_rcu() part. */
|
||||
struct llist_head srs_next; /* request a GP users. */
|
||||
struct llist_node *srs_wait_tail; /* wait for GP users. */
|
||||
struct llist_node *srs_done_tail; /* ready for GP users. */
|
||||
struct sr_wait_node srs_wait_nodes[SR_NORMAL_GP_WAIT_HEAD_MAX];
|
||||
struct work_struct srs_cleanup_work;
|
||||
};
|
||||
|
||||
/* Values for rcu_state structure's gp_flags field. */
|
||||
|
@ -930,7 +930,7 @@ void synchronize_rcu_expedited(void)
|
||||
|
||||
/* If expedited grace periods are prohibited, fall back to normal. */
|
||||
if (rcu_gp_is_normal()) {
|
||||
wait_rcu_gp(call_rcu_hurry);
|
||||
synchronize_rcu_normal();
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -408,7 +408,7 @@ void wakeme_after_rcu(struct rcu_head *head)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(wakeme_after_rcu);
|
||||
|
||||
void __wait_rcu_gp(bool checktiny, int n, call_rcu_func_t *crcu_array,
|
||||
void __wait_rcu_gp(bool checktiny, unsigned int state, int n, call_rcu_func_t *crcu_array,
|
||||
struct rcu_synchronize *rs_array)
|
||||
{
|
||||
int i;
|
||||
@ -440,7 +440,7 @@ void __wait_rcu_gp(bool checktiny, int n, call_rcu_func_t *crcu_array,
|
||||
if (crcu_array[j] == crcu_array[i])
|
||||
break;
|
||||
if (j == i) {
|
||||
wait_for_completion(&rs_array[i].completion);
|
||||
wait_for_completion_state(&rs_array[i].completion, state);
|
||||
destroy_rcu_head_on_stack(&rs_array[i].head);
|
||||
}
|
||||
}
|
||||
|
@ -163,7 +163,7 @@ config TRACING
|
||||
select BINARY_PRINTF
|
||||
select EVENT_TRACING
|
||||
select TRACE_CLOCK
|
||||
select TASKS_RCU if PREEMPTION
|
||||
select NEED_TASKS_RCU
|
||||
|
||||
config GENERIC_TRACER
|
||||
bool
|
||||
@ -204,7 +204,7 @@ config FUNCTION_TRACER
|
||||
select GENERIC_TRACER
|
||||
select CONTEXT_SWITCH_TRACER
|
||||
select GLOB
|
||||
select TASKS_RCU if PREEMPTION
|
||||
select NEED_TASKS_RCU
|
||||
select TASKS_RUDE_RCU
|
||||
help
|
||||
Enable the kernel to trace every kernel function. This is done
|
||||
|
@ -3157,8 +3157,7 @@ out:
|
||||
* synchronize_rcu_tasks() will wait for those tasks to
|
||||
* execute and either schedule voluntarily or enter user space.
|
||||
*/
|
||||
if (IS_ENABLED(CONFIG_PREEMPTION))
|
||||
synchronize_rcu_tasks();
|
||||
synchronize_rcu_tasks();
|
||||
|
||||
ftrace_trampoline_free(ops);
|
||||
}
|
||||
|
@ -391,7 +391,7 @@ __EOF__
|
||||
forceflavor="`echo $flavor | sed -e 's/^CONFIG/CONFIG_FORCE/'`"
|
||||
deselectedflavors="`grep -v $flavor $T/rcutasksflavors | tr '\012' ' ' | tr -s ' ' | sed -e 's/ *$//'`"
|
||||
echo " --- Running RCU Tasks Trace flavor $flavor `date`" >> $rtfdir/log
|
||||
tools/testing/selftests/rcutorture/bin/kvm.sh --datestamp "$ds/results-rcutasksflavors/$flavor" --buildonly --configs "TINY01 TREE04" --kconfig "CONFIG_RCU_EXPERT=y CONFIG_RCU_SCALE_TEST=y $forceflavor=y $deselectedflavors" --trust-make > $T/$flavor.out 2>&1
|
||||
tools/testing/selftests/rcutorture/bin/kvm.sh --datestamp "$ds/results-rcutasksflavors/$flavor" --buildonly --configs "TINY01 TREE04" --kconfig "CONFIG_RCU_EXPERT=y CONFIG_RCU_SCALE_TEST=y CONFIG_KPROBES=n CONFIG_RCU_TRACE=n CONFIG_TRACING=n CONFIG_BLK_DEV_IO_TRACE=n CONFIG_UPROBE_EVENTS=n $forceflavor=y $deselectedflavors" --trust-make > $T/$flavor.out 2>&1
|
||||
retcode=$?
|
||||
if test "$retcode" -ne 0
|
||||
then
|
||||
@ -425,7 +425,7 @@ fi
|
||||
if test "$do_scftorture" = "yes"
|
||||
then
|
||||
# Scale memory based on the number of CPUs.
|
||||
scfmem=$((2+HALF_ALLOTED_CPUS/16))
|
||||
scfmem=$((3+HALF_ALLOTED_CPUS/16))
|
||||
torture_bootargs="scftorture.nthreads=$HALF_ALLOTED_CPUS torture.disable_onoff_at_boot csdlock_debug=1"
|
||||
torture_set "scftorture" tools/testing/selftests/rcutorture/bin/kvm.sh --torture scf --allcpus --duration "$duration_scftorture" --configs "$configs_scftorture" --kconfig "CONFIG_NR_CPUS=$HALF_ALLOTED_CPUS" --memory ${scfmem}G --trust-make
|
||||
fi
|
||||
@ -559,7 +559,7 @@ do_kcsan="$do_kcsan_save"
|
||||
if test "$do_kvfree" = "yes"
|
||||
then
|
||||
torture_bootargs="rcuscale.kfree_rcu_test=1 rcuscale.kfree_nthreads=16 rcuscale.holdoff=20 rcuscale.kfree_loops=10000 torture.disable_onoff_at_boot"
|
||||
torture_set "rcuscale-kvfree" tools/testing/selftests/rcutorture/bin/kvm.sh --torture rcuscale --allcpus --duration 10 --kconfig "CONFIG_NR_CPUS=$HALF_ALLOTED_CPUS" --memory 2G --trust-make
|
||||
torture_set "rcuscale-kvfree" tools/testing/selftests/rcutorture/bin/kvm.sh --torture rcuscale --allcpus --duration $duration_rcutorture --kconfig "CONFIG_NR_CPUS=$HALF_ALLOTED_CPUS" --memory 2G --trust-make
|
||||
fi
|
||||
|
||||
if test "$do_clocksourcewd" = "yes"
|
||||
|
@ -10,8 +10,9 @@ CONFIG_NO_HZ_FULL=n
|
||||
CONFIG_RCU_TRACE=n
|
||||
CONFIG_RCU_NOCB_CPU=n
|
||||
CONFIG_DEBUG_LOCK_ALLOC=n
|
||||
CONFIG_RCU_BOOST=n
|
||||
CONFIG_RCU_BOOST=y
|
||||
CONFIG_RCU_BOOST_DELAY=100
|
||||
CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
|
||||
#CHECK#CONFIG_RCU_EXPERT=n
|
||||
CONFIG_RCU_EXPERT=y
|
||||
CONFIG_KPROBES=n
|
||||
CONFIG_FTRACE=n
|
||||
|
Loading…
Reference in New Issue
Block a user