1
linux/arch/x86/kernel
Peter Zijlstra 87e24f4b67 perf/x86: Fix local vs remote memory events for NHM/WSM
Verified using the below proglet.. before:

[root@westmere ~]# perf stat -e node-stores -e node-store-misses ./numa 0
remote write

 Performance counter stats for './numa 0':

         2,101,554 node-stores
         2,096,931 node-store-misses

       5.021546079 seconds time elapsed

[root@westmere ~]# perf stat -e node-stores -e node-store-misses ./numa 1
local write

 Performance counter stats for './numa 1':

           501,137 node-stores
               199 node-store-misses

       5.124451068 seconds time elapsed

After:

[root@westmere ~]# perf stat -e node-stores -e node-store-misses ./numa 0
remote write

 Performance counter stats for './numa 0':

         2,107,516 node-stores
         2,097,187 node-store-misses

       5.012755149 seconds time elapsed

[root@westmere ~]# perf stat -e node-stores -e node-store-misses ./numa 1
local write

 Performance counter stats for './numa 1':

         2,063,355 node-stores
               165 node-store-misses

       5.082091494 seconds time elapsed

#define _GNU_SOURCE

#include <sched.h>
#include <stdio.h>
#include <errno.h>
#include <sys/mman.h>
#include <sys/types.h>
#include <dirent.h>
#include <signal.h>
#include <unistd.h>
#include <numaif.h>
#include <stdlib.h>

#define SIZE (32*1024*1024)

volatile int done;

void sig_done(int sig)
{
	done = 1;
}

int main(int argc, char **argv)
{
	cpu_set_t *mask, *mask2;
	size_t size;
	int i, err, t;
	int nrcpus = 1024;
	char *mem;
	unsigned long nodemask = 0x01; /* node 0 */
	DIR *node;
	struct dirent *de;
	int read = 0;
	int local = 0;

	if (argc < 2) {
		printf("usage: %s [0-3]\n", argv[0]);
		printf("  bit0 - local/remote\n");
		printf("  bit1 - read/write\n");
		exit(0);
	}

	switch (atoi(argv[1])) {
	case 0:
		printf("remote write\n");
		break;
	case 1:
		printf("local write\n");
		local = 1;
		break;
	case 2:
		printf("remote read\n");
		read = 1;
		break;
	case 3:
		printf("local read\n");
		local = 1;
		read = 1;
		break;
	}

	mask = CPU_ALLOC(nrcpus);
	size = CPU_ALLOC_SIZE(nrcpus);
	CPU_ZERO_S(size, mask);

	node = opendir("/sys/devices/system/node/node0/");
	if (!node)
		perror("opendir");
	while ((de = readdir(node))) {
		int cpu;

		if (sscanf(de->d_name, "cpu%d", &cpu) == 1)
			CPU_SET_S(cpu, size, mask);
	}
	closedir(node);

	mask2 = CPU_ALLOC(nrcpus);
	CPU_ZERO_S(size, mask2);
	for (i = 0; i < size; i++)
		CPU_SET_S(i, size, mask2);
	CPU_XOR_S(size, mask2, mask2, mask); // invert

	if (!local)
		mask = mask2;

	err = sched_setaffinity(0, size, mask);
	if (err)
		perror("sched_setaffinity");

	mem = mmap(0, SIZE, PROT_READ|PROT_WRITE,
			MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
	err = mbind(mem, SIZE, MPOL_BIND, &nodemask, 8*sizeof(nodemask), MPOL_MF_MOVE);
	if (err)
		perror("mbind");

	signal(SIGALRM, sig_done);
	alarm(5);

	if (!read) {
		while (!done) {
			for (i = 0; i < SIZE; i++)
				mem[i] = 0x01;
		}
	} else {
		while (!done) {
			for (i = 0; i < SIZE; i++)
				t += *(volatile char *)(mem + i);
		}
	}

	return 0;
}

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Stephane Eranian <eranian@google.com>
Cc: <stable@kernel.org>
Link: http://lkml.kernel.org/n/tip-tq73sxus35xmqpojf7ootxgs@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2012-03-12 20:43:41 +01:00
..
acpi x86, acpi: Skip acpi x2apic entries if the x2apic feature is not present 2011-12-23 11:00:50 -08:00
apic x86, UV: Update Boot messages for SGI UV2 platform 2012-01-08 12:35:44 +01:00
cpu perf/x86: Fix local vs remote memory events for NHM/WSM 2012-03-12 20:43:41 +01:00
.gitignore
alternative.c x86: Call stop_machine_text_poke() on all CPUs 2011-11-14 13:05:15 +01:00
amd_gart_64.c doc: fix broken references 2011-09-27 18:08:04 +02:00
amd_nb.c Merge branch 'linux-next' of git://git.kernel.org/pub/scm/linux/kernel/git/jbarnes/pci 2012-01-11 18:50:26 -08:00
apb_timer.c Merge branch 'timers-clocksource-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip 2011-07-23 10:34:47 -07:00
aperture_64.c memblock, x86: Replace memblock_x86_reserve/free_range() with generic ones 2011-07-14 11:47:53 -07:00
apm_32.c module_param: make bool parameters really bool (arch) 2012-01-13 09:32:18 +10:30
asm-offsets_32.c x86: Generate system call tables and unistd_*.h from tables 2011-11-17 13:35:37 -08:00
asm-offsets_64.c x86: Generate system call tables and unistd_*.h from tables 2011-11-17 13:35:37 -08:00
asm-offsets.c x86, efi: EFI boot stub support 2011-12-12 14:26:10 -08:00
audit_64.c
bootflag.c
check.c memblock, x86: Replace memblock_x86_reserve/free_range() with generic ones 2011-07-14 11:47:53 -07:00
cpuid.c switch device_get_devnode() and ->devnode() to umode_t * 2012-01-03 22:54:55 -05:00
crash_dump_32.c crash_dump: export is_kdump_kernel to modules, consolidate elfcorehdr_addr, setup_elfcorehdr and saved_max_pfn 2011-03-23 19:47:19 -07:00
crash_dump_64.c crash_dump: export is_kdump_kernel to modules, consolidate elfcorehdr_addr, setup_elfcorehdr and saved_max_pfn 2011-03-23 19:47:19 -07:00
crash.c x86, nmi: Wire up NMI handlers to new routines 2011-10-10 06:56:57 +02:00
devicetree.c x86: Fix files explicitly requiring export.h for EXPORT_SYMBOL/THIS_MODULE 2011-10-31 19:30:35 -04:00
doublefault_32.c
dumpstack_32.c x86, dumpstack: Fix code bytes breakage due to missing KERN_CONT 2011-12-19 13:09:56 -08:00
dumpstack_64.c Merge branches 'core-urgent-for-linus', 'perf-urgent-for-linus', 'sched-urgent-for-linus' and 'x86-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip 2012-02-02 11:11:13 -08:00
dumpstack.c bugs, x86: Fix printk levels for panic, softlockups and stack dumps 2012-01-26 21:28:45 +01:00
e820.c Merge branch 'release' of git://git.kernel.org/pub/scm/linux/kernel/git/lenb/linux 2012-01-18 15:51:48 -08:00
early_printk.c Merge branch 'x86-platform-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip 2012-01-11 19:13:40 -08:00
early-quirks.c x86, quirk: Fix SB600 revision check 2011-03-16 14:03:32 +01:00
entry_32.S Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/audit 2012-01-17 16:41:31 -08:00
entry_64.S x86: Specify a size for the cmp in the NMI handler 2012-02-20 19:45:26 -05:00
ftrace.c x86/ftrace: Fix compiler warning in ftrace.c 2011-05-25 19:56:26 -04:00
head32.c memblock: Kill memblock_init() 2011-12-08 10:22:07 -08:00
head64.c memblock: Kill memblock_init() 2011-12-08 10:22:07 -08:00
head_32.S Merge branch 'x86-platform-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip 2011-03-15 20:01:36 -07:00
head_64.S x86: Keep current stack in NMI breakpoints 2011-12-21 15:38:55 -05:00
head.c memblock, x86: Replace memblock_x86_reserve/free_range() with generic ones 2011-07-14 11:47:53 -07:00
hpet.c Merge branch 'driver-core-next' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/driver-core 2012-01-07 12:03:30 -08:00
hw_breakpoint.c
i386_ksyms_32.c
i387.c treewide: fix potentially dangerous trailing ';' in #defined values/expressions 2011-07-21 14:10:00 +02:00
i8237.c x86: Use syscore_ops instead of sysdev classes and sysdevs 2011-03-23 22:15:54 +01:00
i8253.c x86: Use common i8253 clockevent 2011-07-01 10:37:14 +02:00
i8259.c atomic: use <linux/atomic.h> 2011-07-26 16:49:47 -07:00
init_task.c
io_delay.c
ioport.c
irq_32.c x86: Fix the 32-bit stackoverflow-debug build 2011-12-05 12:25:44 +01:00
irq_64.c x86: Add stack top margin for stack overflow checking 2011-12-07 09:27:11 +01:00
irq_work.c
irq.c Merge branch 'x86-apic-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip 2012-01-06 13:58:21 -08:00
irqinit.c driver-core: remove sysdev.h usage. 2011-12-21 16:26:03 -08:00
jump_label.c jump_label, x86: Fix section mismatch 2011-12-06 20:41:02 +01:00
kdebugfs.c
kgdb.c x86, nmi: Wire up NMI handlers to new routines 2011-10-10 06:56:57 +02:00
kprobes.c Merge branch 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip 2011-10-26 17:03:38 +02:00
kvm.c KVM guest: remove KVM guest pv mmu support 2011-12-27 11:17:08 +02:00
kvmclock.c KVM guest: prevent tracing recursion with kvmclock 2011-11-20 10:53:48 +02:00
ldt.c
machine_kexec_32.c
machine_kexec_64.c
Makefile Merge branch 'x86-syscall-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip 2012-01-16 18:19:19 -08:00
mca_32.c x86: Fix common misspellings 2011-03-18 10:39:30 +01:00
microcode_amd.c x86/microcode: Remove noisy AMD microcode warning 2012-02-07 10:53:42 +01:00
microcode_core.c Merge branch 'driver-core-next' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/driver-core 2012-01-07 12:03:30 -08:00
microcode_intel.c x86, intel: Output microcode revision in /proc/cpuinfo 2011-10-14 13:16:35 +02:00
mmconf-fam10h_64.c
module.c modules: make arch's use default loader hooks 2011-07-24 22:06:04 +09:30
mpparse.c Merge branch 'memblock-kill-early_node_map' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/misc into core/memblock 2011-12-20 12:14:26 +01:00
msr.c switch device_get_devnode() and ->devnode() to umode_t * 2012-01-03 22:54:55 -05:00
nmi_selftest.c x86, NMI: Add to_cpumask() to silence compile warning 2011-12-07 23:26:57 +01:00
nmi.c x86: Allow NMIs to hit breakpoints in i386 2011-12-21 15:38:55 -05:00
paravirt_patch_32.c
paravirt_patch_64.c
paravirt-spinlocks.c
paravirt.c Merge branch 'x86-vdso-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-tip 2011-08-12 20:46:24 -07:00
pci-calgary_64.c treewide: Convert uses of struct resource to resource_size(ptr) 2011-06-10 14:55:36 +02:00
pci-dma.c iommu: Add option to group multi-function devices 2011-11-15 12:22:31 +01:00
pci-iommu_table.c arch/x86/kernel/pci-iommu_table.c: Convert sprintf_symbol to %pS 2011-05-10 10:21:35 +02:00
pci-nommu.c
pci-swiotlb.c
pcspeaker.c
probe_roms.c x86: Fix files explicitly requiring export.h for EXPORT_SYMBOL/THIS_MODULE 2011-10-31 19:30:35 -04:00
process_32.c i387: support lazy restore of FPU state 2012-02-20 10:58:54 -08:00
process_64.c i387: support lazy restore of FPU state 2012-02-20 10:58:54 -08:00
process.c x86: Fix rflags in FAKE_STACK_FRAME 2011-12-06 10:02:38 +01:00
ptrace.c audit: inline audit_syscall_entry to reduce burden on archs 2012-01-17 16:16:56 -05:00
pvclock.c
quirks.c x86, amd: Fix up numa_node information for AMD CPU family 15h model 0-0fh northbridge functions 2011-12-05 18:13:11 +01:00
reboot_32.S x86, reboot: Fix relocations in reboot_32.S 2011-05-02 14:44:46 -07:00
reboot_fixups_32.c
reboot.c x86/reboot: Remove VersaLogic Menlow reboot quirk 2012-01-30 10:52:33 +01:00
relocate_kernel_32.S kexec, x86: Fix incorrect jump back address if not preserving context 2011-07-21 11:19:28 +02:00
relocate_kernel_64.S kexec, x86: Fix incorrect jump back address if not preserving context 2011-07-21 11:19:28 +02:00
resource.c
rtc.c x86/rtc, mrst: Don't register a platform RTC device for for Intel MID platforms 2011-12-05 17:09:21 +01:00
setup_percpu.c
setup.c Merge branch 'x86-efi-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip 2012-01-11 19:12:33 -08:00
signal.c signal: add block_sigmask() for adding sigmask to current->blocked 2012-01-10 16:30:54 -08:00
smp.c x86, reboot: Fix typo in nmi reboot path 2012-01-07 12:19:37 +01:00
smpboot.c Merge branch 'x86-platform-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip 2012-01-11 19:13:40 -08:00
stacktrace.c x86: Swap save_stack_trace_regs parameters 2011-06-14 22:48:51 -04:00
step.c x86-64: Add user_64bit_mode paravirt op 2011-08-04 16:13:49 -07:00
sys_i386_32.c
sys_x86_64.c x86, amd: Include linux/elf.h since we use stuff from asm/elf.h 2011-09-28 10:34:31 +02:00
syscall_32.c x86, syscall: Re-fix typo in comment 2011-11-18 16:25:07 -08:00
syscall_64.c x86: Generate system call tables and unistd_*.h from tables 2011-11-17 13:35:37 -08:00
tboot.c x86: Fix files explicitly requiring export.h for EXPORT_SYMBOL/THIS_MODULE 2011-10-31 19:30:35 -04:00
tce_64.c
test_nx.c x86: Eliminate various 'set but not used' warnings 2011-05-21 19:10:33 +02:00
test_rodata.c
time.c x86: Fix files explicitly requiring export.h for EXPORT_SYMBOL/THIS_MODULE 2011-10-31 19:30:35 -04:00
tls.c
tls.h
topology.c x86: Fix files explicitly requiring export.h for EXPORT_SYMBOL/THIS_MODULE 2011-10-31 19:30:35 -04:00
trampoline_32.S
trampoline_64.S
trampoline.c memblock, x86: Replace memblock_x86_reserve/free_range() with generic ones 2011-07-14 11:47:53 -07:00
traps.c i387: use 'restore_fpu_checking()' directly in task switching code 2012-02-20 10:58:28 -08:00
tsc_sync.c x86, tsc: Skip TSC synchronization checks for tsc=reliable 2011-12-05 18:00:31 +01:00
tsc.c Merge remote-tracking branch 'linus/master' into x86/urgent 2012-01-19 12:56:50 -08:00
verify_cpu.S x86: Fix common misspellings 2011-03-18 10:39:30 +01:00
vm86_32.c x86-32: Fix build failure with AUDIT=y, AUDITSYSCALL=n 2012-01-17 18:10:11 -08:00
vmlinux.lds.S x86-64: Rework vsyscall emulation and add vsyscall= parameter 2011-08-10 19:26:46 -05:00
vsmp_64.c
vsyscall_64.c x86: Default to vsyscall=emulate 2011-12-05 12:17:29 +01:00
vsyscall_emu_64.S x86-64: Rework vsyscall emulation and add vsyscall= parameter 2011-08-10 19:26:46 -05:00
vsyscall_trace.h x86-64: Add vsyscall:emulate_vsyscall trace event 2011-08-04 16:13:53 -07:00
x86_init.c Merge branch 'linux-next' of git://git.kernel.org/pub/scm/linux/kernel/git/jbarnes/pci 2012-01-11 18:50:26 -08:00
x8664_ksyms_64.c
xsave.c i387: move TS_USEDFPU flag from thread_info to task_struct 2012-02-18 10:19:41 -08:00