1
linux/include/asm-generic/vmlinux.lds.h
Steven Rostedt 1f0d69a9fc tracing: profile likely and unlikely annotations
Impact: new unlikely/likely profiler

Andrew Morton recently suggested having an in-kernel way to profile
likely and unlikely macros. This patch achieves that goal.

When configured, every(*) likely and unlikely macro gets a counter attached
to it. When the condition is hit, the hit and misses of that condition
are recorded. These numbers can later be retrieved by:

  /debugfs/tracing/profile_likely    - All likely markers
  /debugfs/tracing/profile_unlikely  - All unlikely markers.

# cat /debug/tracing/profile_unlikely | head
 correct incorrect  %        Function                  File              Line
 ------- ---------  -        --------                  ----              ----
    2167        0   0 do_arch_prctl                  process_64.c         832
       0        0   0 do_arch_prctl                  process_64.c         804
    2670        0   0 IS_ERR                         err.h                34
   71230     5693   7 __switch_to                    process_64.c         673
   76919        0   0 __switch_to                    process_64.c         639
   43184    33743  43 __switch_to                    process_64.c         624
   12740    64181  83 __switch_to                    process_64.c         594
   12740    64174  83 __switch_to                    process_64.c         590

# cat /debug/tracing/profile_unlikely | \
  awk '{ if ($3 > 25) print $0; }' |head -20
   44963    35259  43 __switch_to                    process_64.c         624
   12762    67454  84 __switch_to                    process_64.c         594
   12762    67447  84 __switch_to                    process_64.c         590
    1478      595  28 syscall_get_error              syscall.h            51
       0     2821 100 syscall_trace_leave            ptrace.c             1567
       0        1 100 native_smp_prepare_cpus        smpboot.c            1237
   86338   265881  75 calc_delta_fair                sched_fair.c         408
  210410   108540  34 calc_delta_mine                sched.c              1267
       0    54550 100 sched_info_queued              sched_stats.h        222
   51899    66435  56 pick_next_task_fair            sched_fair.c         1422
       6       10  62 yield_task_fair                sched_fair.c         982
    7325     2692  26 rt_policy                      sched.c              144
       0     1270 100 pre_schedule_rt                sched_rt.c           1261
    1268    48073  97 pick_next_task_rt              sched_rt.c           884
       0    45181 100 sched_info_dequeued            sched_stats.h        177
       0       15 100 sched_move_task                sched.c              8700
       0       15 100 sched_move_task                sched.c              8690
   53167    33217  38 schedule                       sched.c              4457
       0    80208 100 sched_info_switch              sched_stats.h        270
   30585    49631  61 context_switch                 sched.c              2619

# cat /debug/tracing/profile_likely | awk '{ if ($3 > 25) print $0; }'
   39900    36577  47 pick_next_task                 sched.c              4397
   20824    15233  42 switch_mm                      mmu_context_64.h     18
       0        7 100 __cancel_work_timer            workqueue.c          560
     617    66484  99 clocksource_adjust             timekeeping.c        456
       0   346340 100 audit_syscall_exit             auditsc.c            1570
      38   347350  99 audit_get_context              auditsc.c            732
       0   345244 100 audit_syscall_entry            auditsc.c            1541
      38     1017  96 audit_free                     auditsc.c            1446
       0     1090 100 audit_alloc                    auditsc.c            862
    2618     1090  29 audit_alloc                    auditsc.c            858
       0        6 100 move_masked_irq                migration.c          9
       1      198  99 probe_sched_wakeup             trace_sched_switch.c 58
       2        2  50 probe_wakeup                   trace_sched_wakeup.c 227
       0        2 100 probe_wakeup_sched_switch      trace_sched_wakeup.c 144
    4514     2090  31 __grab_cache_page              filemap.c            2149
   12882   228786  94 mapping_unevictable            pagemap.h            50
       4       11  73 __flush_cpu_slab               slub.c               1466
  627757   330451  34 slab_free                      slub.c               1731
    2959    61245  95 dentry_lru_del_init            dcache.c             153
     946     1217  56 load_elf_binary                binfmt_elf.c         904
     102       82  44 disk_put_part                  genhd.h              206
       1        1  50 dst_gc_task                    dst.c                82
       0       19 100 tcp_mss_split_point            tcp_output.c         1126

As you can see by the above, there's a bit of work to do in rethinking
the use of some unlikelys and likelys. Note: the unlikely case had 71 hits
that were more than 25%.

Note:  After submitting my first version of this patch, Andrew Morton
  showed me a version written by Daniel Walker, where I picked up
  the following ideas from:

  1)  Using __builtin_constant_p to avoid profiling fixed values.
  2)  Using __FILE__ instead of instruction pointers.
  3)  Using the preprocessor to stop all profiling of likely
       annotations from vsyscall_64.c.

Thanks to Andrew Morton, Arjan van de Ven, Theodore Tso and Ingo Molnar
for their feed back on this patch.

(*) Not ever unlikely is recorded, those that are used by vsyscalls
 (a few of them) had to have profiling disabled.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Theodore Tso <tytso@mit.edu>
Cc: Arjan van de Ven <arjan@infradead.org>
Cc: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2008-11-12 11:52:02 +01:00

425 lines
13 KiB
C

#ifndef LOAD_OFFSET
#define LOAD_OFFSET 0
#endif
#ifndef VMLINUX_SYMBOL
#define VMLINUX_SYMBOL(_sym_) _sym_
#endif
/* Align . to a 8 byte boundary equals to maximum function alignment. */
#define ALIGN_FUNCTION() . = ALIGN(8)
/* The actual configuration determine if the init/exit sections
* are handled as text/data or they can be discarded (which
* often happens at runtime)
*/
#ifdef CONFIG_HOTPLUG
#define DEV_KEEP(sec) *(.dev##sec)
#define DEV_DISCARD(sec)
#else
#define DEV_KEEP(sec)
#define DEV_DISCARD(sec) *(.dev##sec)
#endif
#ifdef CONFIG_HOTPLUG_CPU
#define CPU_KEEP(sec) *(.cpu##sec)
#define CPU_DISCARD(sec)
#else
#define CPU_KEEP(sec)
#define CPU_DISCARD(sec) *(.cpu##sec)
#endif
#if defined(CONFIG_MEMORY_HOTPLUG)
#define MEM_KEEP(sec) *(.mem##sec)
#define MEM_DISCARD(sec)
#else
#define MEM_KEEP(sec)
#define MEM_DISCARD(sec) *(.mem##sec)
#endif
#ifdef CONFIG_FTRACE_MCOUNT_RECORD
#define MCOUNT_REC() VMLINUX_SYMBOL(__start_mcount_loc) = .; \
*(__mcount_loc) \
VMLINUX_SYMBOL(__stop_mcount_loc) = .;
#else
#define MCOUNT_REC()
#endif
#ifdef CONFIG_TRACE_UNLIKELY_PROFILE
#define LIKELY_PROFILE() VMLINUX_SYMBOL(__start_likely_profile) = .; \
*(_ftrace_likely) \
VMLINUX_SYMBOL(__stop_likely_profile) = .; \
VMLINUX_SYMBOL(__start_unlikely_profile) = .; \
*(_ftrace_unlikely) \
VMLINUX_SYMBOL(__stop_unlikely_profile) = .;
#else
#define LIKELY_PROFILE()
#endif
/* .data section */
#define DATA_DATA \
*(.data) \
*(.data.init.refok) \
*(.ref.data) \
DEV_KEEP(init.data) \
DEV_KEEP(exit.data) \
CPU_KEEP(init.data) \
CPU_KEEP(exit.data) \
MEM_KEEP(init.data) \
MEM_KEEP(exit.data) \
. = ALIGN(8); \
VMLINUX_SYMBOL(__start___markers) = .; \
*(__markers) \
VMLINUX_SYMBOL(__stop___markers) = .; \
VMLINUX_SYMBOL(__start___tracepoints) = .; \
*(__tracepoints) \
VMLINUX_SYMBOL(__stop___tracepoints) = .; \
LIKELY_PROFILE()
#define RO_DATA(align) \
. = ALIGN((align)); \
.rodata : AT(ADDR(.rodata) - LOAD_OFFSET) { \
VMLINUX_SYMBOL(__start_rodata) = .; \
*(.rodata) *(.rodata.*) \
*(__vermagic) /* Kernel version magic */ \
*(__markers_strings) /* Markers: strings */ \
*(__tracepoints_strings)/* Tracepoints: strings */ \
} \
\
.rodata1 : AT(ADDR(.rodata1) - LOAD_OFFSET) { \
*(.rodata1) \
} \
\
BUG_TABLE \
\
/* PCI quirks */ \
.pci_fixup : AT(ADDR(.pci_fixup) - LOAD_OFFSET) { \
VMLINUX_SYMBOL(__start_pci_fixups_early) = .; \
*(.pci_fixup_early) \
VMLINUX_SYMBOL(__end_pci_fixups_early) = .; \
VMLINUX_SYMBOL(__start_pci_fixups_header) = .; \
*(.pci_fixup_header) \
VMLINUX_SYMBOL(__end_pci_fixups_header) = .; \
VMLINUX_SYMBOL(__start_pci_fixups_final) = .; \
*(.pci_fixup_final) \
VMLINUX_SYMBOL(__end_pci_fixups_final) = .; \
VMLINUX_SYMBOL(__start_pci_fixups_enable) = .; \
*(.pci_fixup_enable) \
VMLINUX_SYMBOL(__end_pci_fixups_enable) = .; \
VMLINUX_SYMBOL(__start_pci_fixups_resume) = .; \
*(.pci_fixup_resume) \
VMLINUX_SYMBOL(__end_pci_fixups_resume) = .; \
VMLINUX_SYMBOL(__start_pci_fixups_resume_early) = .; \
*(.pci_fixup_resume_early) \
VMLINUX_SYMBOL(__end_pci_fixups_resume_early) = .; \
VMLINUX_SYMBOL(__start_pci_fixups_suspend) = .; \
*(.pci_fixup_suspend) \
VMLINUX_SYMBOL(__end_pci_fixups_suspend) = .; \
} \
\
/* Built-in firmware blobs */ \
.builtin_fw : AT(ADDR(.builtin_fw) - LOAD_OFFSET) { \
VMLINUX_SYMBOL(__start_builtin_fw) = .; \
*(.builtin_fw) \
VMLINUX_SYMBOL(__end_builtin_fw) = .; \
} \
\
/* RapidIO route ops */ \
.rio_route : AT(ADDR(.rio_route) - LOAD_OFFSET) { \
VMLINUX_SYMBOL(__start_rio_route_ops) = .; \
*(.rio_route_ops) \
VMLINUX_SYMBOL(__end_rio_route_ops) = .; \
} \
\
TRACEDATA \
\
/* Kernel symbol table: Normal symbols */ \
__ksymtab : AT(ADDR(__ksymtab) - LOAD_OFFSET) { \
VMLINUX_SYMBOL(__start___ksymtab) = .; \
*(__ksymtab) \
VMLINUX_SYMBOL(__stop___ksymtab) = .; \
} \
\
/* Kernel symbol table: GPL-only symbols */ \
__ksymtab_gpl : AT(ADDR(__ksymtab_gpl) - LOAD_OFFSET) { \
VMLINUX_SYMBOL(__start___ksymtab_gpl) = .; \
*(__ksymtab_gpl) \
VMLINUX_SYMBOL(__stop___ksymtab_gpl) = .; \
} \
\
/* Kernel symbol table: Normal unused symbols */ \
__ksymtab_unused : AT(ADDR(__ksymtab_unused) - LOAD_OFFSET) { \
VMLINUX_SYMBOL(__start___ksymtab_unused) = .; \
*(__ksymtab_unused) \
VMLINUX_SYMBOL(__stop___ksymtab_unused) = .; \
} \
\
/* Kernel symbol table: GPL-only unused symbols */ \
__ksymtab_unused_gpl : AT(ADDR(__ksymtab_unused_gpl) - LOAD_OFFSET) { \
VMLINUX_SYMBOL(__start___ksymtab_unused_gpl) = .; \
*(__ksymtab_unused_gpl) \
VMLINUX_SYMBOL(__stop___ksymtab_unused_gpl) = .; \
} \
\
/* Kernel symbol table: GPL-future-only symbols */ \
__ksymtab_gpl_future : AT(ADDR(__ksymtab_gpl_future) - LOAD_OFFSET) { \
VMLINUX_SYMBOL(__start___ksymtab_gpl_future) = .; \
*(__ksymtab_gpl_future) \
VMLINUX_SYMBOL(__stop___ksymtab_gpl_future) = .; \
} \
\
/* Kernel symbol table: Normal symbols */ \
__kcrctab : AT(ADDR(__kcrctab) - LOAD_OFFSET) { \
VMLINUX_SYMBOL(__start___kcrctab) = .; \
*(__kcrctab) \
VMLINUX_SYMBOL(__stop___kcrctab) = .; \
} \
\
/* Kernel symbol table: GPL-only symbols */ \
__kcrctab_gpl : AT(ADDR(__kcrctab_gpl) - LOAD_OFFSET) { \
VMLINUX_SYMBOL(__start___kcrctab_gpl) = .; \
*(__kcrctab_gpl) \
VMLINUX_SYMBOL(__stop___kcrctab_gpl) = .; \
} \
\
/* Kernel symbol table: Normal unused symbols */ \
__kcrctab_unused : AT(ADDR(__kcrctab_unused) - LOAD_OFFSET) { \
VMLINUX_SYMBOL(__start___kcrctab_unused) = .; \
*(__kcrctab_unused) \
VMLINUX_SYMBOL(__stop___kcrctab_unused) = .; \
} \
\
/* Kernel symbol table: GPL-only unused symbols */ \
__kcrctab_unused_gpl : AT(ADDR(__kcrctab_unused_gpl) - LOAD_OFFSET) { \
VMLINUX_SYMBOL(__start___kcrctab_unused_gpl) = .; \
*(__kcrctab_unused_gpl) \
VMLINUX_SYMBOL(__stop___kcrctab_unused_gpl) = .; \
} \
\
/* Kernel symbol table: GPL-future-only symbols */ \
__kcrctab_gpl_future : AT(ADDR(__kcrctab_gpl_future) - LOAD_OFFSET) { \
VMLINUX_SYMBOL(__start___kcrctab_gpl_future) = .; \
*(__kcrctab_gpl_future) \
VMLINUX_SYMBOL(__stop___kcrctab_gpl_future) = .; \
} \
\
/* Kernel symbol table: strings */ \
__ksymtab_strings : AT(ADDR(__ksymtab_strings) - LOAD_OFFSET) { \
*(__ksymtab_strings) \
} \
\
/* __*init sections */ \
__init_rodata : AT(ADDR(__init_rodata) - LOAD_OFFSET) { \
*(.ref.rodata) \
MCOUNT_REC() \
DEV_KEEP(init.rodata) \
DEV_KEEP(exit.rodata) \
CPU_KEEP(init.rodata) \
CPU_KEEP(exit.rodata) \
MEM_KEEP(init.rodata) \
MEM_KEEP(exit.rodata) \
} \
\
/* Built-in module parameters. */ \
__param : AT(ADDR(__param) - LOAD_OFFSET) { \
VMLINUX_SYMBOL(__start___param) = .; \
*(__param) \
VMLINUX_SYMBOL(__stop___param) = .; \
. = ALIGN((align)); \
VMLINUX_SYMBOL(__end_rodata) = .; \
} \
. = ALIGN((align));
/* RODATA provided for backward compatibility.
* All archs are supposed to use RO_DATA() */
#define RODATA RO_DATA(4096)
#define SECURITY_INIT \
.security_initcall.init : AT(ADDR(.security_initcall.init) - LOAD_OFFSET) { \
VMLINUX_SYMBOL(__security_initcall_start) = .; \
*(.security_initcall.init) \
VMLINUX_SYMBOL(__security_initcall_end) = .; \
}
/* .text section. Map to function alignment to avoid address changes
* during second ld run in second ld pass when generating System.map */
#define TEXT_TEXT \
ALIGN_FUNCTION(); \
*(.text.hot) \
*(.text) \
*(.ref.text) \
*(.text.init.refok) \
*(.exit.text.refok) \
DEV_KEEP(init.text) \
DEV_KEEP(exit.text) \
CPU_KEEP(init.text) \
CPU_KEEP(exit.text) \
MEM_KEEP(init.text) \
MEM_KEEP(exit.text) \
*(.text.unlikely)
/* sched.text is aling to function alignment to secure we have same
* address even at second ld pass when generating System.map */
#define SCHED_TEXT \
ALIGN_FUNCTION(); \
VMLINUX_SYMBOL(__sched_text_start) = .; \
*(.sched.text) \
VMLINUX_SYMBOL(__sched_text_end) = .;
/* spinlock.text is aling to function alignment to secure we have same
* address even at second ld pass when generating System.map */
#define LOCK_TEXT \
ALIGN_FUNCTION(); \
VMLINUX_SYMBOL(__lock_text_start) = .; \
*(.spinlock.text) \
VMLINUX_SYMBOL(__lock_text_end) = .;
#define KPROBES_TEXT \
ALIGN_FUNCTION(); \
VMLINUX_SYMBOL(__kprobes_text_start) = .; \
*(.kprobes.text) \
VMLINUX_SYMBOL(__kprobes_text_end) = .;
/* Section used for early init (in .S files) */
#define HEAD_TEXT *(.head.text)
/* init and exit section handling */
#define INIT_DATA \
*(.init.data) \
DEV_DISCARD(init.data) \
DEV_DISCARD(init.rodata) \
CPU_DISCARD(init.data) \
CPU_DISCARD(init.rodata) \
MEM_DISCARD(init.data) \
MEM_DISCARD(init.rodata) \
/* implement dynamic printk debug */ \
VMLINUX_SYMBOL(__start___verbose_strings) = .; \
*(__verbose_strings) \
VMLINUX_SYMBOL(__stop___verbose_strings) = .; \
. = ALIGN(8); \
VMLINUX_SYMBOL(__start___verbose) = .; \
*(__verbose) \
VMLINUX_SYMBOL(__stop___verbose) = .;
#define INIT_TEXT \
*(.init.text) \
DEV_DISCARD(init.text) \
CPU_DISCARD(init.text) \
MEM_DISCARD(init.text)
#define EXIT_DATA \
*(.exit.data) \
DEV_DISCARD(exit.data) \
DEV_DISCARD(exit.rodata) \
CPU_DISCARD(exit.data) \
CPU_DISCARD(exit.rodata) \
MEM_DISCARD(exit.data) \
MEM_DISCARD(exit.rodata)
#define EXIT_TEXT \
*(.exit.text) \
DEV_DISCARD(exit.text) \
CPU_DISCARD(exit.text) \
MEM_DISCARD(exit.text)
/* DWARF debug sections.
Symbols in the DWARF debugging sections are relative to
the beginning of the section so we begin them at 0. */
#define DWARF_DEBUG \
/* DWARF 1 */ \
.debug 0 : { *(.debug) } \
.line 0 : { *(.line) } \
/* GNU DWARF 1 extensions */ \
.debug_srcinfo 0 : { *(.debug_srcinfo) } \
.debug_sfnames 0 : { *(.debug_sfnames) } \
/* DWARF 1.1 and DWARF 2 */ \
.debug_aranges 0 : { *(.debug_aranges) } \
.debug_pubnames 0 : { *(.debug_pubnames) } \
/* DWARF 2 */ \
.debug_info 0 : { *(.debug_info \
.gnu.linkonce.wi.*) } \
.debug_abbrev 0 : { *(.debug_abbrev) } \
.debug_line 0 : { *(.debug_line) } \
.debug_frame 0 : { *(.debug_frame) } \
.debug_str 0 : { *(.debug_str) } \
.debug_loc 0 : { *(.debug_loc) } \
.debug_macinfo 0 : { *(.debug_macinfo) } \
/* SGI/MIPS DWARF 2 extensions */ \
.debug_weaknames 0 : { *(.debug_weaknames) } \
.debug_funcnames 0 : { *(.debug_funcnames) } \
.debug_typenames 0 : { *(.debug_typenames) } \
.debug_varnames 0 : { *(.debug_varnames) } \
/* Stabs debugging sections. */
#define STABS_DEBUG \
.stab 0 : { *(.stab) } \
.stabstr 0 : { *(.stabstr) } \
.stab.excl 0 : { *(.stab.excl) } \
.stab.exclstr 0 : { *(.stab.exclstr) } \
.stab.index 0 : { *(.stab.index) } \
.stab.indexstr 0 : { *(.stab.indexstr) } \
.comment 0 : { *(.comment) }
#ifdef CONFIG_GENERIC_BUG
#define BUG_TABLE \
. = ALIGN(8); \
__bug_table : AT(ADDR(__bug_table) - LOAD_OFFSET) { \
VMLINUX_SYMBOL(__start___bug_table) = .; \
*(__bug_table) \
VMLINUX_SYMBOL(__stop___bug_table) = .; \
}
#else
#define BUG_TABLE
#endif
#ifdef CONFIG_PM_TRACE
#define TRACEDATA \
. = ALIGN(4); \
.tracedata : AT(ADDR(.tracedata) - LOAD_OFFSET) { \
VMLINUX_SYMBOL(__tracedata_start) = .; \
*(.tracedata) \
VMLINUX_SYMBOL(__tracedata_end) = .; \
}
#else
#define TRACEDATA
#endif
#define NOTES \
.notes : AT(ADDR(.notes) - LOAD_OFFSET) { \
VMLINUX_SYMBOL(__start_notes) = .; \
*(.note.*) \
VMLINUX_SYMBOL(__stop_notes) = .; \
}
#define INITCALLS \
*(.initcallearly.init) \
VMLINUX_SYMBOL(__early_initcall_end) = .; \
*(.initcall0.init) \
*(.initcall0s.init) \
*(.initcall1.init) \
*(.initcall1s.init) \
*(.initcall2.init) \
*(.initcall2s.init) \
*(.initcall3.init) \
*(.initcall3s.init) \
*(.initcall4.init) \
*(.initcall4s.init) \
*(.initcall5.init) \
*(.initcall5s.init) \
*(.initcallrootfs.init) \
*(.initcall6.init) \
*(.initcall6s.init) \
*(.initcall7.init) \
*(.initcall7s.init)
#define PERCPU(align) \
. = ALIGN(align); \
VMLINUX_SYMBOL(__per_cpu_start) = .; \
.data.percpu : AT(ADDR(.data.percpu) - LOAD_OFFSET) { \
*(.data.percpu.page_aligned) \
*(.data.percpu) \
*(.data.percpu.shared_aligned) \
} \
VMLINUX_SYMBOL(__per_cpu_end) = .;