perf tools: Fix sparse CPU numbering related bugs
At present, the perf subcommands that do system-wide monitoring
(perf stat, perf record and perf top) don't work properly unless
the online cpus are numbered 0, 1, ..., N-1. These tools ask
for the number of online cpus with sysconf(_SC_NPROCESSORS_ONLN)
and then try to create events for cpus 0, 1, ..., N-1.
This creates problems for systems where the online cpus are
numbered sparsely. For example, a POWER6 system in
single-threaded mode (i.e. only running 1 hardware thread per
core) will have only even-numbered cpus online.
This fixes the problem by reading the /sys/devices/system/cpu/online
file to find out which cpus are online. The code that does that is in
tools/perf/util/cpumap.[ch], and consists of a read_cpu_map()
function that sets up a cpumap[] array and returns the number of
online cpus. If /sys/devices/system/cpu/online can't be read or
can't be parsed successfully, it falls back to using sysconf to
ask how many cpus are online and sets up an identity map in cpumap[].
The perf record, perf stat and perf top code then calls
read_cpu_map() in the system-wide monitoring case (instead of
sysconf) and uses cpumap[] to get the cpu numbers to pass to
perf_event_open.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Cc: Anton Blanchard <anton@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@infradead.org>
LKML-Reference: <20100310093609.GA3959@brick.ozlabs.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2010-03-10 02:36:09 -07:00
|
|
|
#ifndef __PERF_CPUMAP_H
|
|
|
|
#define __PERF_CPUMAP_H
|
|
|
|
|
2012-01-19 09:07:23 -07:00
|
|
|
#include <stdio.h>
|
2012-09-26 08:41:14 -07:00
|
|
|
#include <stdbool.h>
|
2012-01-19 09:07:23 -07:00
|
|
|
|
2014-04-07 11:55:21 -07:00
|
|
|
#include "perf.h"
|
|
|
|
#include "util/debug.h"
|
|
|
|
|
2011-01-03 12:49:48 -07:00
|
|
|
struct cpu_map {
|
|
|
|
int nr;
|
|
|
|
int map[];
|
|
|
|
};
|
|
|
|
|
|
|
|
struct cpu_map *cpu_map__new(const char *cpu_list);
|
|
|
|
struct cpu_map *cpu_map__dummy_new(void);
|
2011-01-14 11:19:12 -07:00
|
|
|
void cpu_map__delete(struct cpu_map *map);
|
2012-09-10 00:53:50 -07:00
|
|
|
struct cpu_map *cpu_map__read(FILE *file);
|
2012-01-19 09:07:23 -07:00
|
|
|
size_t cpu_map__fprintf(struct cpu_map *map, FILE *fp);
|
2013-02-06 07:46:01 -07:00
|
|
|
int cpu_map__get_socket(struct cpu_map *map, int idx);
|
2013-02-14 05:57:29 -07:00
|
|
|
int cpu_map__get_core(struct cpu_map *map, int idx);
|
2013-02-06 07:46:01 -07:00
|
|
|
int cpu_map__build_socket_map(struct cpu_map *cpus, struct cpu_map **sockp);
|
2013-02-14 05:57:29 -07:00
|
|
|
int cpu_map__build_core_map(struct cpu_map *cpus, struct cpu_map **corep);
|
2013-02-06 07:46:01 -07:00
|
|
|
|
|
|
|
static inline int cpu_map__socket(struct cpu_map *sock, int s)
|
|
|
|
{
|
|
|
|
if (!sock || s > sock->nr || s < 0)
|
|
|
|
return 0;
|
|
|
|
return sock->map[s];
|
|
|
|
}
|
2012-01-19 09:07:23 -07:00
|
|
|
|
2013-02-14 05:57:29 -07:00
|
|
|
static inline int cpu_map__id_to_socket(int id)
|
|
|
|
{
|
|
|
|
return id >> 16;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline int cpu_map__id_to_cpu(int id)
|
|
|
|
{
|
|
|
|
return id & 0xffff;
|
|
|
|
}
|
|
|
|
|
2012-09-26 08:41:14 -07:00
|
|
|
static inline int cpu_map__nr(const struct cpu_map *map)
|
|
|
|
{
|
|
|
|
return map ? map->nr : 1;
|
|
|
|
}
|
|
|
|
|
2013-05-22 17:42:38 -07:00
|
|
|
static inline bool cpu_map__empty(const struct cpu_map *map)
|
2012-09-26 08:41:14 -07:00
|
|
|
{
|
|
|
|
return map ? map->map[0] == -1 : true;
|
|
|
|
}
|
|
|
|
|
2014-04-07 11:55:21 -07:00
|
|
|
int max_cpu_num;
|
|
|
|
int max_node_num;
|
|
|
|
int *cpunode_map;
|
|
|
|
|
|
|
|
int cpu__setup_cpunode_map(void);
|
|
|
|
|
|
|
|
static inline int cpu__max_node(void)
|
|
|
|
{
|
|
|
|
if (unlikely(!max_node_num))
|
|
|
|
pr_debug("cpu_map not initialized\n");
|
|
|
|
|
|
|
|
return max_node_num;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline int cpu__max_cpu(void)
|
|
|
|
{
|
|
|
|
if (unlikely(!max_cpu_num))
|
|
|
|
pr_debug("cpu_map not initialized\n");
|
|
|
|
|
|
|
|
return max_cpu_num;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline int cpu__get_node(int cpu)
|
|
|
|
{
|
|
|
|
if (unlikely(cpunode_map == NULL)) {
|
|
|
|
pr_debug("cpu_map not initialized\n");
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
return cpunode_map[cpu];
|
|
|
|
}
|
|
|
|
|
perf tools: Fix sparse CPU numbering related bugs
At present, the perf subcommands that do system-wide monitoring
(perf stat, perf record and perf top) don't work properly unless
the online cpus are numbered 0, 1, ..., N-1. These tools ask
for the number of online cpus with sysconf(_SC_NPROCESSORS_ONLN)
and then try to create events for cpus 0, 1, ..., N-1.
This creates problems for systems where the online cpus are
numbered sparsely. For example, a POWER6 system in
single-threaded mode (i.e. only running 1 hardware thread per
core) will have only even-numbered cpus online.
This fixes the problem by reading the /sys/devices/system/cpu/online
file to find out which cpus are online. The code that does that is in
tools/perf/util/cpumap.[ch], and consists of a read_cpu_map()
function that sets up a cpumap[] array and returns the number of
online cpus. If /sys/devices/system/cpu/online can't be read or
can't be parsed successfully, it falls back to using sysconf to
ask how many cpus are online and sets up an identity map in cpumap[].
The perf record, perf stat and perf top code then calls
read_cpu_map() in the system-wide monitoring case (instead of
sysconf) and uses cpumap[] to get the cpu numbers to pass to
perf_event_open.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Cc: Anton Blanchard <anton@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@infradead.org>
LKML-Reference: <20100310093609.GA3959@brick.ozlabs.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2010-03-10 02:36:09 -07:00
|
|
|
#endif /* __PERF_CPUMAP_H */
|