1
linux/include/linux/posix-timers.h
Eric Dumazet 8af088710d posix-timers: RCU conversion
Ben Nagy reported a scalability problem with KVM/QEMU that hit very hard
a single spinlock (idr_lock) in posix-timers code, on its 48 core
machine.

Even on a 16 cpu machine (2x4x2), a single test can show 98% of cpu time
used in ticket_spin_lock, from lock_timer

Ref: http://www.spinics.net/lists/kvm/msg51526.html

Switching to RCU is quite easy, IDR being already RCU ready. idr_lock
should be locked only for an insert/delete, not a lookup.

Benchmark on a 2x4x2 machine, 16 processes calling timer_gettime().

Before :

real    1m18.669s
user    0m1.346s
sys     1m17.180s

After :

real    0m3.296s
user    0m1.366s
sys     0m1.926s

Reported-by: Ben Nagy <ben@iagu.net>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Tested-by: Ben Nagy <ben@iagu.net>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Avi Kivity <avi@redhat.com>
Cc: John Stultz <johnstul@us.ibm.com>
Cc: Richard Cochran <richard.cochran@omicron.at>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
2011-05-24 12:10:51 +02:00

130 lines
4.0 KiB
C

#ifndef _linux_POSIX_TIMERS_H
#define _linux_POSIX_TIMERS_H
#include <linux/spinlock.h>
#include <linux/list.h>
#include <linux/sched.h>
#include <linux/timex.h>
#include <linux/alarmtimer.h>
union cpu_time_count {
cputime_t cpu;
unsigned long long sched;
};
struct cpu_timer_list {
struct list_head entry;
union cpu_time_count expires, incr;
struct task_struct *task;
int firing;
};
/*
* Bit fields within a clockid:
*
* The most significant 29 bits hold either a pid or a file descriptor.
*
* Bit 2 indicates whether a cpu clock refers to a thread or a process.
*
* Bits 1 and 0 give the type: PROF=0, VIRT=1, SCHED=2, or FD=3.
*
* A clockid is invalid if bits 2, 1, and 0 are all set.
*/
#define CPUCLOCK_PID(clock) ((pid_t) ~((clock) >> 3))
#define CPUCLOCK_PERTHREAD(clock) \
(((clock) & (clockid_t) CPUCLOCK_PERTHREAD_MASK) != 0)
#define CPUCLOCK_PERTHREAD_MASK 4
#define CPUCLOCK_WHICH(clock) ((clock) & (clockid_t) CPUCLOCK_CLOCK_MASK)
#define CPUCLOCK_CLOCK_MASK 3
#define CPUCLOCK_PROF 0
#define CPUCLOCK_VIRT 1
#define CPUCLOCK_SCHED 2
#define CPUCLOCK_MAX 3
#define CLOCKFD CPUCLOCK_MAX
#define CLOCKFD_MASK (CPUCLOCK_PERTHREAD_MASK|CPUCLOCK_CLOCK_MASK)
#define MAKE_PROCESS_CPUCLOCK(pid, clock) \
((~(clockid_t) (pid) << 3) | (clockid_t) (clock))
#define MAKE_THREAD_CPUCLOCK(tid, clock) \
MAKE_PROCESS_CPUCLOCK((tid), (clock) | CPUCLOCK_PERTHREAD_MASK)
#define FD_TO_CLOCKID(fd) ((~(clockid_t) (fd) << 3) | CLOCKFD)
#define CLOCKID_TO_FD(clk) ((unsigned int) ~((clk) >> 3))
/* POSIX.1b interval timer structure. */
struct k_itimer {
struct list_head list; /* free/ allocate list */
spinlock_t it_lock;
clockid_t it_clock; /* which timer type */
timer_t it_id; /* timer id */
int it_overrun; /* overrun on pending signal */
int it_overrun_last; /* overrun on last delivered signal */
int it_requeue_pending; /* waiting to requeue this timer */
#define REQUEUE_PENDING 1
int it_sigev_notify; /* notify word of sigevent struct */
struct signal_struct *it_signal;
union {
struct pid *it_pid; /* pid of process to send signal to */
struct task_struct *it_process; /* for clock_nanosleep */
};
struct sigqueue *sigq; /* signal queue entry. */
union {
struct {
struct hrtimer timer;
ktime_t interval;
} real;
struct cpu_timer_list cpu;
struct {
unsigned int clock;
unsigned int node;
unsigned long incr;
unsigned long expires;
} mmtimer;
struct alarm alarmtimer;
struct rcu_head rcu;
} it;
};
struct k_clock {
int (*clock_getres) (const clockid_t which_clock, struct timespec *tp);
int (*clock_set) (const clockid_t which_clock,
const struct timespec *tp);
int (*clock_get) (const clockid_t which_clock, struct timespec * tp);
int (*clock_adj) (const clockid_t which_clock, struct timex *tx);
int (*timer_create) (struct k_itimer *timer);
int (*nsleep) (const clockid_t which_clock, int flags,
struct timespec *, struct timespec __user *);
long (*nsleep_restart) (struct restart_block *restart_block);
int (*timer_set) (struct k_itimer * timr, int flags,
struct itimerspec * new_setting,
struct itimerspec * old_setting);
int (*timer_del) (struct k_itimer * timr);
#define TIMER_RETRY 1
void (*timer_get) (struct k_itimer * timr,
struct itimerspec * cur_setting);
};
extern struct k_clock clock_posix_cpu;
extern struct k_clock clock_posix_dynamic;
void posix_timers_register_clock(const clockid_t clock_id, struct k_clock *new_clock);
/* function to call to trigger timer event */
int posix_timer_event(struct k_itimer *timr, int si_private);
void posix_cpu_timer_schedule(struct k_itimer *timer);
void run_posix_cpu_timers(struct task_struct *task);
void posix_cpu_timers_exit(struct task_struct *task);
void posix_cpu_timers_exit_group(struct task_struct *task);
void set_process_cpu_timer(struct task_struct *task, unsigned int clock_idx,
cputime_t *newval, cputime_t *oldval);
long clock_nanosleep_restart(struct restart_block *restart_block);
void update_rlimit_cpu(struct task_struct *task, unsigned long rlim_new);
#endif