fbe96f29ce
The goal of this patch is to include more information about the host environment into the perf.data so it is more self-descriptive. Overtime, profiles are captured on various machines and it becomes hard to track what was recorded, on what machine and when. This patch provides a way to solve this by extending the perf.data file with basic information about the host machine. To add those extensions, we leverage the feature bits capabilities of the perf.data format. The change is backward compatible with existing perf.data files. We define the following useful new extensions: - HEADER_HOSTNAME: the hostname - HEADER_OSRELEASE: the kernel release number - HEADER_ARCH: the hw architecture - HEADER_CPUDESC: generic CPU description - HEADER_NRCPUS: number of online/avail cpus - HEADER_CMDLINE: perf command line - HEADER_VERSION: perf version - HEADER_TOPOLOGY: cpu topology - HEADER_EVENT_DESC: full event description (attrs) - HEADER_CPUID: easy-to-parse low level CPU identication The small granularity for the entries is to make it easier to extend without breaking backward compatiblity. Many entries are provided as ASCII strings. Perf report/script have been modified to print the basic information as easy-to-parse ASCII strings. Extended information about CPU and NUMA topology may be requested with the -I option. Thanks to David Ahern for reviewing and testing the many versions of this patch. $ perf report --stdio # ======== # captured on : Mon Sep 26 15:22:14 2011 # hostname : quad # os release : 3.1.0-rc4-tip # perf version : 3.1.0-rc4 # arch : x86_64 # nrcpus online : 4 # nrcpus avail : 4 # cpudesc : Intel(R) Core(TM)2 Quad CPU Q6600 @ 2.40GHz # cpuid : GenuineIntel,6,15,11 # total memory :8105360
kB # cmdline : /home/eranian/perfmon/official/tip/build/tools/perf/perf record date # event : name = cycles, type = 0, config = 0x0, config1 = 0x0, config2 = 0x0, excl_usr = 0, excl_kern = 0, id = { 29, 30, 31, # HEADER_CPU_TOPOLOGY info available, use -I to display # HEADER_NUMA_TOPOLOGY info available, use -I to display # ======== # ... $ perf report --stdio -I # ======== # captured on : Mon Sep 26 15:22:14 2011 # hostname : quad # os release : 3.1.0-rc4-tip # perf version : 3.1.0-rc4 # arch : x86_64 # nrcpus online : 4 # nrcpus avail : 4 # cpudesc : Intel(R) Core(TM)2 Quad CPU Q6600 @ 2.40GHz # cpuid : GenuineIntel,6,15,11 # total memory :8105360
kB # cmdline : /home/eranian/perfmon/official/tip/build/tools/perf/perf record date # event : name = cycles, type = 0, config = 0x0, config1 = 0x0, config2 = 0x0, excl_usr = 0, excl_kern = 0, id = { 29, 30, 31, # sibling cores : 0-3 # sibling threads : 0 # sibling threads : 1 # sibling threads : 2 # sibling threads : 3 # node0 meminfo : total = 8320608 kB, free = 7571024 kB # node0 cpu list : 0-3 # ======== # ... Reviewed-by: David Ahern <dsahern@gmail.com> Tested-by: David Ahern <dsahern@gmail.com> Cc: David Ahern <dsahern@gmail.com> Cc: Ingo Molnar <mingo@elte.hu> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Robert Richter <robert.richter@amd.com> Cc: Andi Kleen <ak@linux.intel.com> Link: http://lkml.kernel.org/r/20110930134040.GA5575@quad Signed-off-by: Stephane Eranian <eranian@google.com> [ committer notes: Use --show-info in the tools as was in the docs, rename perf_header_fprintf_info to perf_file_section__fprintf_info, fixup conflict withf69b64f7
"perf: Support setting the disassembler style" ] Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
187 lines
4.4 KiB
C
187 lines
4.4 KiB
C
#ifndef _PERF_PERF_H
|
|
#define _PERF_PERF_H
|
|
|
|
struct winsize;
|
|
|
|
void get_term_dimensions(struct winsize *ws);
|
|
|
|
#if defined(__i386__)
|
|
#include "../../arch/x86/include/asm/unistd.h"
|
|
#define rmb() asm volatile("lock; addl $0,0(%%esp)" ::: "memory")
|
|
#define cpu_relax() asm volatile("rep; nop" ::: "memory");
|
|
#define CPUINFO_PROC "model name"
|
|
#endif
|
|
|
|
#if defined(__x86_64__)
|
|
#include "../../arch/x86/include/asm/unistd.h"
|
|
#define rmb() asm volatile("lfence" ::: "memory")
|
|
#define cpu_relax() asm volatile("rep; nop" ::: "memory");
|
|
#define CPUINFO_PROC "model name"
|
|
#endif
|
|
|
|
#ifdef __powerpc__
|
|
#include "../../arch/powerpc/include/asm/unistd.h"
|
|
#define rmb() asm volatile ("sync" ::: "memory")
|
|
#define cpu_relax() asm volatile ("" ::: "memory");
|
|
#define CPUINFO_PROC "cpu"
|
|
#endif
|
|
|
|
#ifdef __s390__
|
|
#include "../../arch/s390/include/asm/unistd.h"
|
|
#define rmb() asm volatile("bcr 15,0" ::: "memory")
|
|
#define cpu_relax() asm volatile("" ::: "memory");
|
|
#endif
|
|
|
|
#ifdef __sh__
|
|
#include "../../arch/sh/include/asm/unistd.h"
|
|
#if defined(__SH4A__) || defined(__SH5__)
|
|
# define rmb() asm volatile("synco" ::: "memory")
|
|
#else
|
|
# define rmb() asm volatile("" ::: "memory")
|
|
#endif
|
|
#define cpu_relax() asm volatile("" ::: "memory")
|
|
#define CPUINFO_PROC "cpu type"
|
|
#endif
|
|
|
|
#ifdef __hppa__
|
|
#include "../../arch/parisc/include/asm/unistd.h"
|
|
#define rmb() asm volatile("" ::: "memory")
|
|
#define cpu_relax() asm volatile("" ::: "memory");
|
|
#define CPUINFO_PROC "cpu"
|
|
#endif
|
|
|
|
#ifdef __sparc__
|
|
#include "../../arch/sparc/include/asm/unistd.h"
|
|
#define rmb() asm volatile("":::"memory")
|
|
#define cpu_relax() asm volatile("":::"memory")
|
|
#define CPUINFO_PROC "cpu"
|
|
#endif
|
|
|
|
#ifdef __alpha__
|
|
#include "../../arch/alpha/include/asm/unistd.h"
|
|
#define rmb() asm volatile("mb" ::: "memory")
|
|
#define cpu_relax() asm volatile("" ::: "memory")
|
|
#define CPUINFO_PROC "cpu model"
|
|
#endif
|
|
|
|
#ifdef __ia64__
|
|
#include "../../arch/ia64/include/asm/unistd.h"
|
|
#define rmb() asm volatile ("mf" ::: "memory")
|
|
#define cpu_relax() asm volatile ("hint @pause" ::: "memory")
|
|
#define CPUINFO_PROC "model name"
|
|
#endif
|
|
|
|
#ifdef __arm__
|
|
#include "../../arch/arm/include/asm/unistd.h"
|
|
/*
|
|
* Use the __kuser_memory_barrier helper in the CPU helper page. See
|
|
* arch/arm/kernel/entry-armv.S in the kernel source for details.
|
|
*/
|
|
#define rmb() ((void(*)(void))0xffff0fa0)()
|
|
#define cpu_relax() asm volatile("":::"memory")
|
|
#define CPUINFO_PROC "Processor"
|
|
#endif
|
|
|
|
#ifdef __mips__
|
|
#include "../../arch/mips/include/asm/unistd.h"
|
|
#define rmb() asm volatile( \
|
|
".set mips2\n\t" \
|
|
"sync\n\t" \
|
|
".set mips0" \
|
|
: /* no output */ \
|
|
: /* no input */ \
|
|
: "memory")
|
|
#define cpu_relax() asm volatile("" ::: "memory")
|
|
#define CPUINFO_PROC "cpu model"
|
|
#endif
|
|
|
|
#include <time.h>
|
|
#include <unistd.h>
|
|
#include <sys/types.h>
|
|
#include <sys/syscall.h>
|
|
|
|
#include "../../include/linux/perf_event.h"
|
|
#include "util/types.h"
|
|
#include <stdbool.h>
|
|
|
|
struct perf_mmap {
|
|
void *base;
|
|
int mask;
|
|
unsigned int prev;
|
|
};
|
|
|
|
static inline unsigned int perf_mmap__read_head(struct perf_mmap *mm)
|
|
{
|
|
struct perf_event_mmap_page *pc = mm->base;
|
|
int head = pc->data_head;
|
|
rmb();
|
|
return head;
|
|
}
|
|
|
|
static inline void perf_mmap__write_tail(struct perf_mmap *md,
|
|
unsigned long tail)
|
|
{
|
|
struct perf_event_mmap_page *pc = md->base;
|
|
|
|
/*
|
|
* ensure all reads are done before we write the tail out.
|
|
*/
|
|
/* mb(); */
|
|
pc->data_tail = tail;
|
|
}
|
|
|
|
/*
|
|
* prctl(PR_TASK_PERF_EVENTS_DISABLE) will (cheaply) disable all
|
|
* counters in the current task.
|
|
*/
|
|
#define PR_TASK_PERF_EVENTS_DISABLE 31
|
|
#define PR_TASK_PERF_EVENTS_ENABLE 32
|
|
|
|
#ifndef NSEC_PER_SEC
|
|
# define NSEC_PER_SEC 1000000000ULL
|
|
#endif
|
|
|
|
static inline unsigned long long rdclock(void)
|
|
{
|
|
struct timespec ts;
|
|
|
|
clock_gettime(CLOCK_MONOTONIC, &ts);
|
|
return ts.tv_sec * 1000000000ULL + ts.tv_nsec;
|
|
}
|
|
|
|
/*
|
|
* Pick up some kernel type conventions:
|
|
*/
|
|
#define __user
|
|
#define asmlinkage
|
|
|
|
#define unlikely(x) __builtin_expect(!!(x), 0)
|
|
#define min(x, y) ({ \
|
|
typeof(x) _min1 = (x); \
|
|
typeof(y) _min2 = (y); \
|
|
(void) (&_min1 == &_min2); \
|
|
_min1 < _min2 ? _min1 : _min2; })
|
|
|
|
static inline int
|
|
sys_perf_event_open(struct perf_event_attr *attr,
|
|
pid_t pid, int cpu, int group_fd,
|
|
unsigned long flags)
|
|
{
|
|
attr->size = sizeof(*attr);
|
|
return syscall(__NR_perf_event_open, attr, pid, cpu,
|
|
group_fd, flags);
|
|
}
|
|
|
|
#define MAX_COUNTERS 256
|
|
#define MAX_NR_CPUS 256
|
|
|
|
struct ip_callchain {
|
|
u64 nr;
|
|
u64 ips[0];
|
|
};
|
|
|
|
extern bool perf_host, perf_guest;
|
|
extern const char perf_version_string[];
|
|
|
|
#endif
|