1

Fix various timer bugs:

- Fix a timer migration bug that may result in missed events
  - Fix timer migration group hierarchy event updates
  - Fix a PowerPC64 build warning
  - Fix a handful of DocBook annotation bugs
 
 Signed-off-by: Ingo Molnar <mingo@kernel.org>
 -----BEGIN PGP SIGNATURE-----
 
 iQJFBAABCgAvFiEEBpT5eoXrXCwVQwEKEnMQ0APhK1gFAmYSUpsRHG1pbmdvQGtl
 cm5lbC5vcmcACgkQEnMQ0APhK1h+/RAAlbYzlotBMM0cqxCng5jgetTT7EfQHXl1
 zaqhx2FzEjoyhZ++kpBP03A42LumWz0TXTqRK+BicZIHWvIWz16w7xNr0dHo3+L8
 PfPTZEPb1IwSP1FKHyzEZbVWPnHtokyJBky5Qp5IG5FoNqV1pArqeadyaSbd3hIw
 A3l77wHCtXINkxjROs5EoJiOwVcJWigm4M7189EXDUKKr5nzE0hemNAKGnluQZxj
 O5gF9vv40B38MLuo3xLDxFCrY8WDcq9yhv/AtBk+952FsceSZbH29zOt1a5l2HPb
 yvBR4pMaS6x4UdzJeZTbdqDs8v9QWsCUc+qqeNYuFEJSBu9y7Qo5wec8c+Ptiu0E
 1we/g4nWRaRnXvGyS1uj448jUZgnGu61KFbCCF+guDl94zKY6TBZfVpeWrF/Xjdr
 Jq1K8zYMM/+hxlzqsVhoaL+2zAddUeWnwPcSC5J8mnVlyLJUd55Cd0OGcHimz3PV
 QcimajOcE7e/pkw0eQnRQ6qAVeWXcJY4hWoJS9Nk8F9InfDC7I8T5NgsNVb6Edyx
 fj2wE/K9lAfKevz49ieJ8ItIIus3Lzmi09pbfDmDP5J9iMyL6UMk2VXj8XAUvCdL
 qpgigP1zcluwAFqHmaym6mUsej+VL/WqsKfy6Q8LI5yNvdYtUuzfQuqGqyOyGXX0
 zJg6+qU7OAE=
 =4VkW
 -----END PGP SIGNATURE-----

Merge tag 'timers-urgent-2024-04-07' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull timer fixes from Ingo Molnar:
 "Fix various timer bugs:

   - Fix a timer migration bug that may result in missed events

   - Fix timer migration group hierarchy event updates

   - Fix a PowerPC64 build warning

   - Fix a handful of DocBook annotation bugs"

* tag 'timers-urgent-2024-04-07' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  timers/migration: Return early on deactivation
  timers/migration: Fix ignored event due to missing CPU update
  vdso: Use CONFIG_PAGE_SHIFT in vdso/datapage.h
  timers: Fix text inconsistencies and spelling
  tick/sched: Fix struct tick_sched doc warnings
  tick/sched: Fix various kernel-doc warnings
  timers: Fix kernel-doc format and add Return values
  time/timekeeping: Fix kernel-doc warnings and typos
  time/timecounter: Fix inline documentation
This commit is contained in:
Linus Torvalds 2024-04-07 09:20:50 -07:00
commit 3520c35e5f
9 changed files with 121 additions and 36 deletions

View File

@ -4,7 +4,6 @@
#ifndef __ASSEMBLY__
#include <asm/page.h>
#include <asm/vdso/timebase.h>
#include <asm/barrier.h>
#include <asm/unistd.h>
@ -95,7 +94,7 @@ const struct vdso_data *__arch_get_vdso_data(void);
static __always_inline
const struct vdso_data *__arch_get_timens_vdso_data(const struct vdso_data *vd)
{
return (void *)vd + PAGE_SIZE;
return (void *)vd + (1U << CONFIG_PAGE_SHIFT);
}
#endif

View File

@ -22,7 +22,7 @@
*
* @read: returns the current cycle value
* @mask: bitmask for two's complement
* subtraction of non 64 bit counters,
* subtraction of non-64-bit counters,
* see CYCLECOUNTER_MASK() helper macro
* @mult: cycle to nanosecond multiplier
* @shift: cycle to nanosecond divisor (power of two)
@ -35,7 +35,7 @@ struct cyclecounter {
};
/**
* struct timecounter - layer above a %struct cyclecounter which counts nanoseconds
* struct timecounter - layer above a &struct cyclecounter which counts nanoseconds
* Contains the state needed by timecounter_read() to detect
* cycle counter wrap around. Initialize with
* timecounter_init(). Also used to convert cycle counts into the
@ -66,6 +66,8 @@ struct timecounter {
* @cycles: Cycles
* @mask: bit mask for maintaining the 'frac' field
* @frac: pointer to storage for the fractional nanoseconds.
*
* Returns: cycle counter cycles converted to nanoseconds
*/
static inline u64 cyclecounter_cyc2ns(const struct cyclecounter *cc,
u64 cycles, u64 mask, u64 *frac)
@ -79,6 +81,7 @@ static inline u64 cyclecounter_cyc2ns(const struct cyclecounter *cc,
/**
* timecounter_adjtime - Shifts the time of the clock.
* @tc: The &struct timecounter to adjust
* @delta: Desired change in nanoseconds.
*/
static inline void timecounter_adjtime(struct timecounter *tc, s64 delta)
@ -107,6 +110,8 @@ extern void timecounter_init(struct timecounter *tc,
*
* In other words, keeps track of time since the same epoch as
* the function which generated the initial time stamp.
*
* Returns: nanoseconds since the initial time stamp
*/
extern u64 timecounter_read(struct timecounter *tc);
@ -123,6 +128,8 @@ extern u64 timecounter_read(struct timecounter *tc);
*
* This allows conversion of cycle counter values which were generated
* in the past.
*
* Returns: cycle counter converted to nanoseconds since the initial time stamp
*/
extern u64 timecounter_cyc2time(const struct timecounter *tc,
u64 cycle_tstamp);

View File

@ -22,14 +22,14 @@ extern int do_sys_settimeofday64(const struct timespec64 *tv,
const struct timezone *tz);
/*
* ktime_get() family: read the current time in a multitude of ways,
* ktime_get() family - read the current time in a multitude of ways.
*
* The default time reference is CLOCK_MONOTONIC, starting at
* boot time but not counting the time spent in suspend.
* For other references, use the functions with "real", "clocktai",
* "boottime" and "raw" suffixes.
*
* To get the time in a different format, use the ones wit
* To get the time in a different format, use the ones with
* "ns", "ts64" and "seconds" suffix.
*
* See Documentation/core-api/timekeeping.rst for more details.
@ -74,6 +74,8 @@ extern u32 ktime_get_resolution_ns(void);
/**
* ktime_get_real - get the real (wall-) time in ktime_t format
*
* Returns: real (wall) time in ktime_t format
*/
static inline ktime_t ktime_get_real(void)
{
@ -86,10 +88,12 @@ static inline ktime_t ktime_get_coarse_real(void)
}
/**
* ktime_get_boottime - Returns monotonic time since boot in ktime_t format
* ktime_get_boottime - Get monotonic time since boot in ktime_t format
*
* This is similar to CLOCK_MONTONIC/ktime_get, but also includes the
* time spent in suspend.
*
* Returns: monotonic time since boot in ktime_t format
*/
static inline ktime_t ktime_get_boottime(void)
{
@ -102,7 +106,9 @@ static inline ktime_t ktime_get_coarse_boottime(void)
}
/**
* ktime_get_clocktai - Returns the TAI time of day in ktime_t format
* ktime_get_clocktai - Get the TAI time of day in ktime_t format
*
* Returns: the TAI time of day in ktime_t format
*/
static inline ktime_t ktime_get_clocktai(void)
{
@ -144,32 +150,60 @@ static inline u64 ktime_get_coarse_clocktai_ns(void)
/**
* ktime_mono_to_real - Convert monotonic time to clock realtime
* @mono: monotonic time to convert
*
* Returns: time converted to realtime clock
*/
static inline ktime_t ktime_mono_to_real(ktime_t mono)
{
return ktime_mono_to_any(mono, TK_OFFS_REAL);
}
/**
* ktime_get_ns - Get the current time in nanoseconds
*
* Returns: current time converted to nanoseconds
*/
static inline u64 ktime_get_ns(void)
{
return ktime_to_ns(ktime_get());
}
/**
* ktime_get_real_ns - Get the current real/wall time in nanoseconds
*
* Returns: current real time converted to nanoseconds
*/
static inline u64 ktime_get_real_ns(void)
{
return ktime_to_ns(ktime_get_real());
}
/**
* ktime_get_boottime_ns - Get the monotonic time since boot in nanoseconds
*
* Returns: current boottime converted to nanoseconds
*/
static inline u64 ktime_get_boottime_ns(void)
{
return ktime_to_ns(ktime_get_boottime());
}
/**
* ktime_get_clocktai_ns - Get the current TAI time of day in nanoseconds
*
* Returns: current TAI time converted to nanoseconds
*/
static inline u64 ktime_get_clocktai_ns(void)
{
return ktime_to_ns(ktime_get_clocktai());
}
/**
* ktime_get_raw_ns - Get the raw monotonic time in nanoseconds
*
* Returns: current raw monotonic time converted to nanoseconds
*/
static inline u64 ktime_get_raw_ns(void)
{
return ktime_to_ns(ktime_get_raw());
@ -224,8 +258,8 @@ extern bool timekeeping_rtc_skipresume(void);
extern void timekeeping_inject_sleeptime64(const struct timespec64 *delta);
/*
* struct ktime_timestanps - Simultaneous mono/boot/real timestamps
/**
* struct ktime_timestamps - Simultaneous mono/boot/real timestamps
* @mono: Monotonic timestamp
* @boot: Boottime timestamp
* @real: Realtime timestamp
@ -242,7 +276,8 @@ struct ktime_timestamps {
* @cycles: Clocksource counter value to produce the system times
* @real: Realtime system time
* @raw: Monotonic raw system time
* @clock_was_set_seq: The sequence number of clock was set events
* @cs_id: Clocksource ID
* @clock_was_set_seq: The sequence number of clock-was-set events
* @cs_was_changed_seq: The sequence number of clocksource change events
*/
struct system_time_snapshot {

View File

@ -22,7 +22,7 @@
#define __TIMER_LOCKDEP_MAP_INITIALIZER(_kn)
#endif
/**
/*
* @TIMER_DEFERRABLE: A deferrable timer will work normally when the
* system is busy, but will not cause a CPU to come out of idle just
* to service it; instead, the timer will be serviced when the CPU
@ -140,7 +140,7 @@ static inline void destroy_timer_on_stack(struct timer_list *timer) { }
* or not. Callers must ensure serialization wrt. other operations done
* to this timer, eg. interrupt contexts, or other CPUs on SMP.
*
* return value: 1 if the timer is pending, 0 if not.
* Returns: 1 if the timer is pending, 0 if not.
*/
static inline int timer_pending(const struct timer_list * timer)
{
@ -175,6 +175,10 @@ extern int timer_shutdown(struct timer_list *timer);
* See timer_delete_sync() for detailed explanation.
*
* Do not use in new code. Use timer_delete_sync() instead.
*
* Returns:
* * %0 - The timer was not pending
* * %1 - The timer was pending and deactivated
*/
static inline int del_timer_sync(struct timer_list *timer)
{
@ -188,6 +192,10 @@ static inline int del_timer_sync(struct timer_list *timer)
* See timer_delete() for detailed explanation.
*
* Do not use in new code. Use timer_delete() instead.
*
* Returns:
* * %0 - The timer was not pending
* * %1 - The timer was pending and deactivated
*/
static inline int del_timer(struct timer_list *timer)
{

View File

@ -19,12 +19,6 @@
#include <vdso/time32.h>
#include <vdso/time64.h>
#ifdef CONFIG_ARM64
#include <asm/page-def.h>
#else
#include <asm/page.h>
#endif
#ifdef CONFIG_ARCH_HAS_VDSO_DATA
#include <asm/vdso/data.h>
#else
@ -132,7 +126,7 @@ extern struct vdso_data _timens_data[CS_BASES] __attribute__((visibility("hidden
*/
union vdso_data_store {
struct vdso_data data[CS_BASES];
u8 page[PAGE_SIZE];
u8 page[1U << CONFIG_PAGE_SHIFT];
};
/*

View File

@ -697,6 +697,7 @@ bool tick_nohz_tick_stopped_cpu(int cpu)
/**
* tick_nohz_update_jiffies - update jiffies when idle was interrupted
* @now: current ktime_t
*
* Called from interrupt entry when the CPU was idle
*
@ -794,7 +795,7 @@ static u64 get_cpu_sleep_time_us(struct tick_sched *ts, ktime_t *sleeptime,
* This time is measured via accounting rather than sampling,
* and is as accurate as ktime_get() is.
*
* This function returns -1 if NOHZ is not enabled.
* Return: -1 if NOHZ is not enabled, else total idle time of the @cpu
*/
u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time)
{
@ -820,7 +821,7 @@ EXPORT_SYMBOL_GPL(get_cpu_idle_time_us);
* This time is measured via accounting rather than sampling,
* and is as accurate as ktime_get() is.
*
* This function returns -1 if NOHZ is not enabled.
* Return: -1 if NOHZ is not enabled, else total iowait time of @cpu
*/
u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time)
{
@ -1287,6 +1288,8 @@ void tick_nohz_irq_exit(void)
/**
* tick_nohz_idle_got_tick - Check whether or not the tick handler has run
*
* Return: %true if the tick handler has run, otherwise %false
*/
bool tick_nohz_idle_got_tick(void)
{
@ -1305,6 +1308,8 @@ bool tick_nohz_idle_got_tick(void)
* stopped, it returns the next hrtimer.
*
* Called from power state control code with interrupts disabled
*
* Return: the next expiration time
*/
ktime_t tick_nohz_get_next_hrtimer(void)
{
@ -1320,6 +1325,8 @@ ktime_t tick_nohz_get_next_hrtimer(void)
* The return value of this function and/or the value returned by it through the
* @delta_next pointer can be negative which must be taken into account by its
* callers.
*
* Return: the expected length of the current sleep
*/
ktime_t tick_nohz_get_sleep_length(ktime_t *delta_next)
{
@ -1357,8 +1364,11 @@ ktime_t tick_nohz_get_sleep_length(ktime_t *delta_next)
/**
* tick_nohz_get_idle_calls_cpu - return the current idle calls counter value
* for a particular CPU.
* @cpu: target CPU number
*
* Called from the schedutil frequency scaling governor in scheduler context.
*
* Return: the current idle calls counter value for @cpu
*/
unsigned long tick_nohz_get_idle_calls_cpu(int cpu)
{
@ -1371,6 +1381,8 @@ unsigned long tick_nohz_get_idle_calls_cpu(int cpu)
* tick_nohz_get_idle_calls - return the current idle calls counter value
*
* Called from the schedutil frequency scaling governor in scheduler context.
*
* Return: the current idle calls counter value for the current CPU
*/
unsigned long tick_nohz_get_idle_calls(void)
{
@ -1559,7 +1571,7 @@ early_param("skew_tick", skew_tick);
/**
* tick_setup_sched_timer - setup the tick emulation timer
* @mode: tick_nohz_mode to setup for
* @hrtimer: whether to use the hrtimer or not
*/
void tick_setup_sched_timer(bool hrtimer)
{

View File

@ -46,8 +46,8 @@ struct tick_device {
* @next_tick: Next tick to be fired when in dynticks mode.
* @idle_jiffies: jiffies at the entry to idle for idle time accounting
* @idle_waketime: Time when the idle was interrupted
* @idle_sleeptime_seq: sequence counter for data consistency
* @idle_entrytime: Time when the idle call was entered
* @nohz_mode: Mode - one state of tick_nohz_mode
* @last_jiffies: Base jiffies snapshot when next event was last computed
* @timer_expires_base: Base time clock monotonic for @timer_expires
* @timer_expires: Anticipated timer expiration time (in case sched tick is stopped)

View File

@ -64,15 +64,15 @@ EXPORT_SYMBOL(jiffies_64);
/*
* The timer wheel has LVL_DEPTH array levels. Each level provides an array of
* LVL_SIZE buckets. Each level is driven by its own clock and therefor each
* LVL_SIZE buckets. Each level is driven by its own clock and therefore each
* level has a different granularity.
*
* The level granularity is: LVL_CLK_DIV ^ lvl
* The level granularity is: LVL_CLK_DIV ^ level
* The level clock frequency is: HZ / (LVL_CLK_DIV ^ level)
*
* The array level of a newly armed timer depends on the relative expiry
* time. The farther the expiry time is away the higher the array level and
* therefor the granularity becomes.
* therefore the granularity becomes.
*
* Contrary to the original timer wheel implementation, which aims for 'exact'
* expiry of the timers, this implementation removes the need for recascading
@ -207,7 +207,7 @@ EXPORT_SYMBOL(jiffies_64);
* struct timer_base - Per CPU timer base (number of base depends on config)
* @lock: Lock protecting the timer_base
* @running_timer: When expiring timers, the lock is dropped. To make
* sure not to race agains deleting/modifying a
* sure not to race against deleting/modifying a
* currently running timer, the pointer is set to the
* timer, which expires at the moment. If no timer is
* running, the pointer is NULL.
@ -737,7 +737,7 @@ static bool timer_is_static_object(void *addr)
}
/*
* fixup_init is called when:
* timer_fixup_init is called when:
* - an active object is initialized
*/
static bool timer_fixup_init(void *addr, enum debug_obj_state state)
@ -761,7 +761,7 @@ static void stub_timer(struct timer_list *unused)
}
/*
* fixup_activate is called when:
* timer_fixup_activate is called when:
* - an active object is activated
* - an unknown non-static object is activated
*/
@ -783,7 +783,7 @@ static bool timer_fixup_activate(void *addr, enum debug_obj_state state)
}
/*
* fixup_free is called when:
* timer_fixup_free is called when:
* - an active object is freed
*/
static bool timer_fixup_free(void *addr, enum debug_obj_state state)
@ -801,7 +801,7 @@ static bool timer_fixup_free(void *addr, enum debug_obj_state state)
}
/*
* fixup_assert_init is called when:
* timer_fixup_assert_init is called when:
* - an untracked/uninit-ed object is found
*/
static bool timer_fixup_assert_init(void *addr, enum debug_obj_state state)
@ -914,7 +914,7 @@ static void do_init_timer(struct timer_list *timer,
* @key: lockdep class key of the fake lock used for tracking timer
* sync lock dependencies
*
* init_timer_key() must be done to a timer prior calling *any* of the
* init_timer_key() must be done to a timer prior to calling *any* of the
* other timer functions.
*/
void init_timer_key(struct timer_list *timer,
@ -1417,7 +1417,7 @@ static int __timer_delete(struct timer_list *timer, bool shutdown)
* If @shutdown is set then the lock has to be taken whether the
* timer is pending or not to protect against a concurrent rearm
* which might hit between the lockless pending check and the lock
* aquisition. By taking the lock it is ensured that such a newly
* acquisition. By taking the lock it is ensured that such a newly
* enqueued timer is dequeued and cannot end up with
* timer->function == NULL in the expiry code.
*
@ -2306,7 +2306,7 @@ static inline u64 __get_next_timer_interrupt(unsigned long basej, u64 basem,
/*
* When timer base is not set idle, undo the effect of
* tmigr_cpu_deactivate() to prevent inconsitent states - active
* tmigr_cpu_deactivate() to prevent inconsistent states - active
* timer base but inactive timer migration hierarchy.
*
* When timer base was already marked idle, nothing will be

View File

@ -751,6 +751,33 @@ bool tmigr_update_events(struct tmigr_group *group, struct tmigr_group *child,
first_childevt = evt = data->evt;
/*
* Walking the hierarchy is required in any case when a
* remote expiry was done before. This ensures to not lose
* already queued events in non active groups (see section
* "Required event and timerqueue update after a remote
* expiry" in the documentation at the top).
*
* The two call sites which are executed without a remote expiry
* before, are not prevented from propagating changes through
* the hierarchy by the return:
* - When entering this path by tmigr_new_timer(), @evt->ignore
* is never set.
* - tmigr_inactive_up() takes care of the propagation by
* itself and ignores the return value. But an immediate
* return is possible if there is a parent, sparing group
* locking at this level, because the upper walking call to
* the parent will take care about removing this event from
* within the group and update next_expiry accordingly.
*
* However if there is no parent, ie: the hierarchy has only a
* single level so @group is the top level group, make sure the
* first event information of the group is updated properly and
* also handled properly, so skip this fast return path.
*/
if (evt->ignore && !remote && group->parent)
return true;
raw_spin_lock(&group->lock);
childstate.state = 0;
@ -762,8 +789,11 @@ bool tmigr_update_events(struct tmigr_group *group, struct tmigr_group *child,
* queue when the expiry time changed only or when it could be ignored.
*/
if (timerqueue_node_queued(&evt->nextevt)) {
if ((evt->nextevt.expires == nextexp) && !evt->ignore)
if ((evt->nextevt.expires == nextexp) && !evt->ignore) {
/* Make sure not to miss a new CPU event with the same expiry */
evt->cpu = first_childevt->cpu;
goto check_toplvl;
}
if (!timerqueue_del(&group->events, &evt->nextevt))
WRITE_ONCE(group->next_expiry, KTIME_MAX);