2018-01-14 12:28:50 -07:00
|
|
|
/* SPDX-License-Identifier: GPL-2.0 */
|
|
|
|
/*
|
|
|
|
* Arm Statistical Profiling Extensions (SPE) support
|
|
|
|
* Copyright (c) 2017-2018, Arm Ltd.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#ifndef INCLUDE__ARM_SPE_PKT_DECODER_H__
|
|
|
|
#define INCLUDE__ARM_SPE_PKT_DECODER_H__
|
|
|
|
|
|
|
|
#include <stddef.h>
|
|
|
|
#include <stdint.h>
|
|
|
|
|
|
|
|
#define ARM_SPE_PKT_DESC_MAX 256
|
|
|
|
|
|
|
|
#define ARM_SPE_NEED_MORE_BYTES -1
|
|
|
|
#define ARM_SPE_BAD_PACKET -2
|
|
|
|
|
perf arm-spe: Support synthetic events
After the commit ffd3d18c20b8 ("perf tools: Add ARM Statistical
Profiling Extensions (SPE) support") has been merged, it supports to
output raw data with option "--dump-raw-trace". However, it misses for
support synthetic events so cannot output any statistical info.
This patch is to improve the "perf report" support for ARM SPE for four
types synthetic events:
First level cache synthetic events, including L1 data cache accessing
and missing events;
Last level cache synthetic events, including last level cache
accessing and missing events;
TLB synthetic events, including TLB accessing and missing events;
Remote access events, which is used to account load/store operations
caused to another socket.
Example usage:
$ perf record -c 1024 -e arm_spe_0/branch_filter=1,ts_enable=1,pct_enable=1,pa_enable=1,load_filter=1,jitter=1,store_filter=1,min_latency=0/ dd if=/dev/zero of=/dev/null count=10000
$ perf report --stdio
# Samples: 59 of event 'l1d-miss'
# Event count (approx.): 59
#
# Children Self Command Shared Object Symbol
# ........ ........ ....... ................. ..................................
#
23.73% 23.73% dd [kernel.kallsyms] [k] perf_iterate_ctx.constprop.135
20.34% 20.34% dd [kernel.kallsyms] [k] filemap_map_pages
5.08% 5.08% dd [kernel.kallsyms] [k] perf_event_mmap
5.08% 5.08% dd [kernel.kallsyms] [k] unlock_page_memcg
5.08% 5.08% dd [kernel.kallsyms] [k] unmap_page_range
3.39% 3.39% dd [kernel.kallsyms] [k] PageHuge
3.39% 3.39% dd [kernel.kallsyms] [k] release_pages
3.39% 3.39% dd ld-2.28.so [.] 0x0000000000008b5c
1.69% 1.69% dd [kernel.kallsyms] [k] __alloc_fd
[...]
# Samples: 3K of event 'l1d-access'
# Event count (approx.): 3980
#
# Children Self Command Shared Object Symbol
# ........ ........ ....... ................. ......................................
#
26.98% 26.98% dd [kernel.kallsyms] [k] ret_to_user
10.53% 10.53% dd [kernel.kallsyms] [k] fsnotify
7.51% 7.51% dd [kernel.kallsyms] [k] new_sync_read
4.57% 4.57% dd [kernel.kallsyms] [k] vfs_read
4.35% 4.35% dd [kernel.kallsyms] [k] vfs_write
3.69% 3.69% dd [kernel.kallsyms] [k] __fget_light
3.69% 3.69% dd [kernel.kallsyms] [k] rw_verify_area
3.44% 3.44% dd [kernel.kallsyms] [k] security_file_permission
2.76% 2.76% dd [kernel.kallsyms] [k] __fsnotify_parent
2.44% 2.44% dd [kernel.kallsyms] [k] ksys_write
2.24% 2.24% dd [kernel.kallsyms] [k] iov_iter_zero
2.19% 2.19% dd [kernel.kallsyms] [k] read_iter_zero
1.81% 1.81% dd dd [.] 0x0000000000002960
1.78% 1.78% dd dd [.] 0x0000000000002980
[...]
# Samples: 35 of event 'llc-miss'
# Event count (approx.): 35
#
# Children Self Command Shared Object Symbol
# ........ ........ ....... ................. ...........................
#
34.29% 34.29% dd [kernel.kallsyms] [k] filemap_map_pages
8.57% 8.57% dd [kernel.kallsyms] [k] unlock_page_memcg
8.57% 8.57% dd [kernel.kallsyms] [k] unmap_page_range
5.71% 5.71% dd [kernel.kallsyms] [k] PageHuge
5.71% 5.71% dd [kernel.kallsyms] [k] release_pages
5.71% 5.71% dd ld-2.28.so [.] 0x0000000000008b5c
2.86% 2.86% dd [kernel.kallsyms] [k] __queue_work
2.86% 2.86% dd [kernel.kallsyms] [k] __radix_tree_lookup
2.86% 2.86% dd [kernel.kallsyms] [k] copy_page
[...]
# Samples: 2 of event 'llc-access'
# Event count (approx.): 2
#
# Children Self Command Shared Object Symbol
# ........ ........ ....... ................. .............
#
50.00% 50.00% dd [kernel.kallsyms] [k] copy_page
50.00% 50.00% dd libc-2.28.so [.] _dl_addr
# Samples: 48 of event 'tlb-miss'
# Event count (approx.): 48
#
# Children Self Command Shared Object Symbol
# ........ ........ ....... ................. ..................................
#
20.83% 20.83% dd [kernel.kallsyms] [k] perf_iterate_ctx.constprop.135
12.50% 12.50% dd [kernel.kallsyms] [k] __arch_clear_user
10.42% 10.42% dd [kernel.kallsyms] [k] clear_page
4.17% 4.17% dd [kernel.kallsyms] [k] copy_page
4.17% 4.17% dd [kernel.kallsyms] [k] filemap_map_pages
2.08% 2.08% dd [kernel.kallsyms] [k] __alloc_fd
2.08% 2.08% dd [kernel.kallsyms] [k] __mod_memcg_state.part.70
2.08% 2.08% dd [kernel.kallsyms] [k] __queue_work
2.08% 2.08% dd [kernel.kallsyms] [k] __rcu_read_unlock
2.08% 2.08% dd [kernel.kallsyms] [k] d_path
2.08% 2.08% dd [kernel.kallsyms] [k] destroy_inode
2.08% 2.08% dd [kernel.kallsyms] [k] do_dentry_open
[...]
# Samples: 9K of event 'tlb-access'
# Event count (approx.): 9573
#
# Children Self Command Shared Object Symbol
# ........ ........ ....... ................. ......................................
#
25.79% 25.79% dd [kernel.kallsyms] [k] __arch_clear_user
11.22% 11.22% dd [kernel.kallsyms] [k] ret_to_user
8.56% 8.56% dd [kernel.kallsyms] [k] fsnotify
4.06% 4.06% dd [kernel.kallsyms] [k] new_sync_read
3.67% 3.67% dd [kernel.kallsyms] [k] el0_svc_common.constprop.2
3.04% 3.04% dd [kernel.kallsyms] [k] __fsnotify_parent
2.90% 2.90% dd [kernel.kallsyms] [k] vfs_write
2.82% 2.82% dd [kernel.kallsyms] [k] vfs_read
2.52% 2.52% dd libc-2.28.so [.] write
2.26% 2.26% dd [kernel.kallsyms] [k] security_file_permission
2.08% 2.08% dd [kernel.kallsyms] [k] ksys_write
1.96% 1.96% dd [kernel.kallsyms] [k] rw_verify_area
1.95% 1.95% dd [kernel.kallsyms] [k] read_iter_zero
[...]
# Samples: 9 of event 'branch-miss'
# Event count (approx.): 9
#
# Children Self Command Shared Object Symbol
# ........ ........ ....... ................. .........................
#
22.22% 22.22% dd libc-2.28.so [.] _dl_addr
11.11% 11.11% dd [kernel.kallsyms] [k] __arch_clear_user
11.11% 11.11% dd [kernel.kallsyms] [k] __arch_copy_from_user
11.11% 11.11% dd [kernel.kallsyms] [k] __dentry_kill
11.11% 11.11% dd [kernel.kallsyms] [k] __efistub_memcpy
11.11% 11.11% dd ld-2.28.so [.] 0x0000000000012b7c
11.11% 11.11% dd libc-2.28.so [.] 0x000000000002a980
11.11% 11.11% dd libc-2.28.so [.] 0x0000000000083340
# Samples: 29 of event 'remote-access'
# Event count (approx.): 29
#
# Children Self Command Shared Object Symbol
# ........ ........ ....... ................. ...........................
#
41.38% 41.38% dd [kernel.kallsyms] [k] filemap_map_pages
10.34% 10.34% dd [kernel.kallsyms] [k] unlock_page_memcg
10.34% 10.34% dd [kernel.kallsyms] [k] unmap_page_range
6.90% 6.90% dd [kernel.kallsyms] [k] release_pages
3.45% 3.45% dd [kernel.kallsyms] [k] PageHuge
3.45% 3.45% dd [kernel.kallsyms] [k] __queue_work
3.45% 3.45% dd [kernel.kallsyms] [k] page_add_file_rmap
3.45% 3.45% dd [kernel.kallsyms] [k] page_counter_try_charge
3.45% 3.45% dd [kernel.kallsyms] [k] page_remove_rmap
3.45% 3.45% dd [kernel.kallsyms] [k] xas_start
3.45% 3.45% dd ld-2.28.so [.] 0x0000000000002a1c
3.45% 3.45% dd ld-2.28.so [.] 0x0000000000008b5c
3.45% 3.45% dd ld-2.28.so [.] 0x00000000000093cc
Signed-off-by: Tan Xiaojun <tanxiaojun@huawei.com>
Tested-by: James Clark <james.clark@arm.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Al Grant <al.grant@arm.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Jin Yao <yao.jin@linux.intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Leo Yan <leo.yan@linaro.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Mathieu Poirier <mathieu.poirier@linaro.org>
Cc: Mike Leach <mike.leach@linaro.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Will Deacon <will@kernel.org>
Cc: linux-arm-kernel@lists.infradead.org
Link: http://lore.kernel.org/lkml/20200530122442.490-4-leo.yan@linaro.org
Signed-off-by: James Clark <james.clark@arm.com>
Signed-off-by: Leo Yan <leo.yan@linaro.org>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2020-05-30 05:24:42 -07:00
|
|
|
#define ARM_SPE_PKT_MAX_SZ 16
|
|
|
|
|
2018-01-14 12:28:50 -07:00
|
|
|
enum arm_spe_pkt_type {
|
|
|
|
ARM_SPE_BAD,
|
|
|
|
ARM_SPE_PAD,
|
|
|
|
ARM_SPE_END,
|
|
|
|
ARM_SPE_TIMESTAMP,
|
|
|
|
ARM_SPE_ADDRESS,
|
|
|
|
ARM_SPE_COUNTER,
|
|
|
|
ARM_SPE_CONTEXT,
|
|
|
|
ARM_SPE_OP_TYPE,
|
|
|
|
ARM_SPE_EVENTS,
|
|
|
|
ARM_SPE_DATA_SOURCE,
|
|
|
|
};
|
|
|
|
|
|
|
|
struct arm_spe_pkt {
|
|
|
|
enum arm_spe_pkt_type type;
|
|
|
|
unsigned char index;
|
|
|
|
uint64_t payload;
|
|
|
|
};
|
|
|
|
|
2020-11-19 08:24:27 -07:00
|
|
|
/* Short header (HEADER0) and extended header (HEADER1) */
|
|
|
|
#define SPE_HEADER0_PAD 0x0
|
|
|
|
#define SPE_HEADER0_END 0x1
|
|
|
|
#define SPE_HEADER0_TIMESTAMP 0x71
|
|
|
|
/* Mask for event & data source */
|
|
|
|
#define SPE_HEADER0_MASK1 (GENMASK_ULL(7, 6) | GENMASK_ULL(3, 0))
|
|
|
|
#define SPE_HEADER0_EVENTS 0x42
|
|
|
|
#define SPE_HEADER0_SOURCE 0x43
|
|
|
|
/* Mask for context & operation */
|
|
|
|
#define SPE_HEADER0_MASK2 GENMASK_ULL(7, 2)
|
|
|
|
#define SPE_HEADER0_CONTEXT 0x64
|
|
|
|
#define SPE_HEADER0_OP_TYPE 0x48
|
|
|
|
/* Mask for extended format */
|
|
|
|
#define SPE_HEADER0_EXTENDED 0x20
|
|
|
|
/* Mask for address & counter */
|
|
|
|
#define SPE_HEADER0_MASK3 GENMASK_ULL(7, 3)
|
|
|
|
#define SPE_HEADER0_ADDRESS 0xb0
|
|
|
|
#define SPE_HEADER0_COUNTER 0x98
|
|
|
|
#define SPE_HEADER1_ALIGNMENT 0x0
|
|
|
|
|
2020-11-19 08:24:29 -07:00
|
|
|
#define SPE_HDR_SHORT_INDEX(h) ((h) & GENMASK_ULL(2, 0))
|
|
|
|
#define SPE_HDR_EXTENDED_INDEX(h0, h1) (((h0) & GENMASK_ULL(1, 0)) << 3 | \
|
|
|
|
SPE_HDR_SHORT_INDEX(h1))
|
|
|
|
|
|
|
|
/* Address packet header */
|
|
|
|
#define SPE_ADDR_PKT_HDR_INDEX_INS 0x0
|
|
|
|
#define SPE_ADDR_PKT_HDR_INDEX_BRANCH 0x1
|
|
|
|
#define SPE_ADDR_PKT_HDR_INDEX_DATA_VIRT 0x2
|
|
|
|
#define SPE_ADDR_PKT_HDR_INDEX_DATA_PHYS 0x3
|
2023-02-03 09:24:01 -07:00
|
|
|
#define SPE_ADDR_PKT_HDR_INDEX_PREV_BRANCH 0x4
|
2020-11-19 08:24:29 -07:00
|
|
|
|
|
|
|
/* Address packet payload */
|
|
|
|
#define SPE_ADDR_PKT_ADDR_BYTE7_SHIFT 56
|
|
|
|
#define SPE_ADDR_PKT_ADDR_GET_BYTES_0_6(v) ((v) & GENMASK_ULL(55, 0))
|
|
|
|
#define SPE_ADDR_PKT_ADDR_GET_BYTE_6(v) (((v) & GENMASK_ULL(55, 48)) >> 48)
|
|
|
|
|
|
|
|
#define SPE_ADDR_PKT_GET_NS(v) (((v) & BIT_ULL(63)) >> 63)
|
|
|
|
#define SPE_ADDR_PKT_GET_EL(v) (((v) & GENMASK_ULL(62, 61)) >> 61)
|
2020-11-19 08:24:40 -07:00
|
|
|
#define SPE_ADDR_PKT_GET_CH(v) (((v) & BIT_ULL(62)) >> 62)
|
|
|
|
#define SPE_ADDR_PKT_GET_PAT(v) (((v) & GENMASK_ULL(59, 56)) >> 56)
|
2020-11-19 08:24:29 -07:00
|
|
|
|
|
|
|
#define SPE_ADDR_PKT_EL0 0
|
|
|
|
#define SPE_ADDR_PKT_EL1 1
|
|
|
|
#define SPE_ADDR_PKT_EL2 2
|
|
|
|
#define SPE_ADDR_PKT_EL3 3
|
perf arm-spe: Support synthetic events
After the commit ffd3d18c20b8 ("perf tools: Add ARM Statistical
Profiling Extensions (SPE) support") has been merged, it supports to
output raw data with option "--dump-raw-trace". However, it misses for
support synthetic events so cannot output any statistical info.
This patch is to improve the "perf report" support for ARM SPE for four
types synthetic events:
First level cache synthetic events, including L1 data cache accessing
and missing events;
Last level cache synthetic events, including last level cache
accessing and missing events;
TLB synthetic events, including TLB accessing and missing events;
Remote access events, which is used to account load/store operations
caused to another socket.
Example usage:
$ perf record -c 1024 -e arm_spe_0/branch_filter=1,ts_enable=1,pct_enable=1,pa_enable=1,load_filter=1,jitter=1,store_filter=1,min_latency=0/ dd if=/dev/zero of=/dev/null count=10000
$ perf report --stdio
# Samples: 59 of event 'l1d-miss'
# Event count (approx.): 59
#
# Children Self Command Shared Object Symbol
# ........ ........ ....... ................. ..................................
#
23.73% 23.73% dd [kernel.kallsyms] [k] perf_iterate_ctx.constprop.135
20.34% 20.34% dd [kernel.kallsyms] [k] filemap_map_pages
5.08% 5.08% dd [kernel.kallsyms] [k] perf_event_mmap
5.08% 5.08% dd [kernel.kallsyms] [k] unlock_page_memcg
5.08% 5.08% dd [kernel.kallsyms] [k] unmap_page_range
3.39% 3.39% dd [kernel.kallsyms] [k] PageHuge
3.39% 3.39% dd [kernel.kallsyms] [k] release_pages
3.39% 3.39% dd ld-2.28.so [.] 0x0000000000008b5c
1.69% 1.69% dd [kernel.kallsyms] [k] __alloc_fd
[...]
# Samples: 3K of event 'l1d-access'
# Event count (approx.): 3980
#
# Children Self Command Shared Object Symbol
# ........ ........ ....... ................. ......................................
#
26.98% 26.98% dd [kernel.kallsyms] [k] ret_to_user
10.53% 10.53% dd [kernel.kallsyms] [k] fsnotify
7.51% 7.51% dd [kernel.kallsyms] [k] new_sync_read
4.57% 4.57% dd [kernel.kallsyms] [k] vfs_read
4.35% 4.35% dd [kernel.kallsyms] [k] vfs_write
3.69% 3.69% dd [kernel.kallsyms] [k] __fget_light
3.69% 3.69% dd [kernel.kallsyms] [k] rw_verify_area
3.44% 3.44% dd [kernel.kallsyms] [k] security_file_permission
2.76% 2.76% dd [kernel.kallsyms] [k] __fsnotify_parent
2.44% 2.44% dd [kernel.kallsyms] [k] ksys_write
2.24% 2.24% dd [kernel.kallsyms] [k] iov_iter_zero
2.19% 2.19% dd [kernel.kallsyms] [k] read_iter_zero
1.81% 1.81% dd dd [.] 0x0000000000002960
1.78% 1.78% dd dd [.] 0x0000000000002980
[...]
# Samples: 35 of event 'llc-miss'
# Event count (approx.): 35
#
# Children Self Command Shared Object Symbol
# ........ ........ ....... ................. ...........................
#
34.29% 34.29% dd [kernel.kallsyms] [k] filemap_map_pages
8.57% 8.57% dd [kernel.kallsyms] [k] unlock_page_memcg
8.57% 8.57% dd [kernel.kallsyms] [k] unmap_page_range
5.71% 5.71% dd [kernel.kallsyms] [k] PageHuge
5.71% 5.71% dd [kernel.kallsyms] [k] release_pages
5.71% 5.71% dd ld-2.28.so [.] 0x0000000000008b5c
2.86% 2.86% dd [kernel.kallsyms] [k] __queue_work
2.86% 2.86% dd [kernel.kallsyms] [k] __radix_tree_lookup
2.86% 2.86% dd [kernel.kallsyms] [k] copy_page
[...]
# Samples: 2 of event 'llc-access'
# Event count (approx.): 2
#
# Children Self Command Shared Object Symbol
# ........ ........ ....... ................. .............
#
50.00% 50.00% dd [kernel.kallsyms] [k] copy_page
50.00% 50.00% dd libc-2.28.so [.] _dl_addr
# Samples: 48 of event 'tlb-miss'
# Event count (approx.): 48
#
# Children Self Command Shared Object Symbol
# ........ ........ ....... ................. ..................................
#
20.83% 20.83% dd [kernel.kallsyms] [k] perf_iterate_ctx.constprop.135
12.50% 12.50% dd [kernel.kallsyms] [k] __arch_clear_user
10.42% 10.42% dd [kernel.kallsyms] [k] clear_page
4.17% 4.17% dd [kernel.kallsyms] [k] copy_page
4.17% 4.17% dd [kernel.kallsyms] [k] filemap_map_pages
2.08% 2.08% dd [kernel.kallsyms] [k] __alloc_fd
2.08% 2.08% dd [kernel.kallsyms] [k] __mod_memcg_state.part.70
2.08% 2.08% dd [kernel.kallsyms] [k] __queue_work
2.08% 2.08% dd [kernel.kallsyms] [k] __rcu_read_unlock
2.08% 2.08% dd [kernel.kallsyms] [k] d_path
2.08% 2.08% dd [kernel.kallsyms] [k] destroy_inode
2.08% 2.08% dd [kernel.kallsyms] [k] do_dentry_open
[...]
# Samples: 9K of event 'tlb-access'
# Event count (approx.): 9573
#
# Children Self Command Shared Object Symbol
# ........ ........ ....... ................. ......................................
#
25.79% 25.79% dd [kernel.kallsyms] [k] __arch_clear_user
11.22% 11.22% dd [kernel.kallsyms] [k] ret_to_user
8.56% 8.56% dd [kernel.kallsyms] [k] fsnotify
4.06% 4.06% dd [kernel.kallsyms] [k] new_sync_read
3.67% 3.67% dd [kernel.kallsyms] [k] el0_svc_common.constprop.2
3.04% 3.04% dd [kernel.kallsyms] [k] __fsnotify_parent
2.90% 2.90% dd [kernel.kallsyms] [k] vfs_write
2.82% 2.82% dd [kernel.kallsyms] [k] vfs_read
2.52% 2.52% dd libc-2.28.so [.] write
2.26% 2.26% dd [kernel.kallsyms] [k] security_file_permission
2.08% 2.08% dd [kernel.kallsyms] [k] ksys_write
1.96% 1.96% dd [kernel.kallsyms] [k] rw_verify_area
1.95% 1.95% dd [kernel.kallsyms] [k] read_iter_zero
[...]
# Samples: 9 of event 'branch-miss'
# Event count (approx.): 9
#
# Children Self Command Shared Object Symbol
# ........ ........ ....... ................. .........................
#
22.22% 22.22% dd libc-2.28.so [.] _dl_addr
11.11% 11.11% dd [kernel.kallsyms] [k] __arch_clear_user
11.11% 11.11% dd [kernel.kallsyms] [k] __arch_copy_from_user
11.11% 11.11% dd [kernel.kallsyms] [k] __dentry_kill
11.11% 11.11% dd [kernel.kallsyms] [k] __efistub_memcpy
11.11% 11.11% dd ld-2.28.so [.] 0x0000000000012b7c
11.11% 11.11% dd libc-2.28.so [.] 0x000000000002a980
11.11% 11.11% dd libc-2.28.so [.] 0x0000000000083340
# Samples: 29 of event 'remote-access'
# Event count (approx.): 29
#
# Children Self Command Shared Object Symbol
# ........ ........ ....... ................. ...........................
#
41.38% 41.38% dd [kernel.kallsyms] [k] filemap_map_pages
10.34% 10.34% dd [kernel.kallsyms] [k] unlock_page_memcg
10.34% 10.34% dd [kernel.kallsyms] [k] unmap_page_range
6.90% 6.90% dd [kernel.kallsyms] [k] release_pages
3.45% 3.45% dd [kernel.kallsyms] [k] PageHuge
3.45% 3.45% dd [kernel.kallsyms] [k] __queue_work
3.45% 3.45% dd [kernel.kallsyms] [k] page_add_file_rmap
3.45% 3.45% dd [kernel.kallsyms] [k] page_counter_try_charge
3.45% 3.45% dd [kernel.kallsyms] [k] page_remove_rmap
3.45% 3.45% dd [kernel.kallsyms] [k] xas_start
3.45% 3.45% dd ld-2.28.so [.] 0x0000000000002a1c
3.45% 3.45% dd ld-2.28.so [.] 0x0000000000008b5c
3.45% 3.45% dd ld-2.28.so [.] 0x00000000000093cc
Signed-off-by: Tan Xiaojun <tanxiaojun@huawei.com>
Tested-by: James Clark <james.clark@arm.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Al Grant <al.grant@arm.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Jin Yao <yao.jin@linux.intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Leo Yan <leo.yan@linaro.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Mathieu Poirier <mathieu.poirier@linaro.org>
Cc: Mike Leach <mike.leach@linaro.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Will Deacon <will@kernel.org>
Cc: linux-arm-kernel@lists.infradead.org
Link: http://lore.kernel.org/lkml/20200530122442.490-4-leo.yan@linaro.org
Signed-off-by: James Clark <james.clark@arm.com>
Signed-off-by: Leo Yan <leo.yan@linaro.org>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2020-05-30 05:24:42 -07:00
|
|
|
|
2020-11-19 08:24:31 -07:00
|
|
|
/* Context packet header */
|
|
|
|
#define SPE_CTX_PKT_HDR_INDEX(h) ((h) & GENMASK_ULL(1, 0))
|
|
|
|
|
2020-11-19 08:24:33 -07:00
|
|
|
/* Counter packet header */
|
|
|
|
#define SPE_CNT_PKT_HDR_INDEX_TOTAL_LAT 0x0
|
|
|
|
#define SPE_CNT_PKT_HDR_INDEX_ISSUE_LAT 0x1
|
|
|
|
#define SPE_CNT_PKT_HDR_INDEX_TRANS_LAT 0x2
|
|
|
|
|
2020-11-19 08:24:35 -07:00
|
|
|
/* Event packet payload */
|
|
|
|
enum arm_spe_events {
|
|
|
|
EV_EXCEPTION_GEN = 0,
|
|
|
|
EV_RETIRED = 1,
|
|
|
|
EV_L1D_ACCESS = 2,
|
|
|
|
EV_L1D_REFILL = 3,
|
|
|
|
EV_TLB_ACCESS = 4,
|
|
|
|
EV_TLB_WALK = 5,
|
|
|
|
EV_NOT_TAKEN = 6,
|
|
|
|
EV_MISPRED = 7,
|
|
|
|
EV_LLC_ACCESS = 8,
|
|
|
|
EV_LLC_MISS = 9,
|
|
|
|
EV_REMOTE_ACCESS = 10,
|
|
|
|
EV_ALIGNMENT = 11,
|
|
|
|
EV_PARTIAL_PREDICATE = 17,
|
|
|
|
EV_EMPTY_PREDICATE = 18,
|
|
|
|
};
|
|
|
|
|
2020-11-19 08:24:38 -07:00
|
|
|
/* Operation packet header */
|
|
|
|
#define SPE_OP_PKT_HDR_CLASS(h) ((h) & GENMASK_ULL(1, 0))
|
|
|
|
#define SPE_OP_PKT_HDR_CLASS_OTHER 0x0
|
|
|
|
#define SPE_OP_PKT_HDR_CLASS_LD_ST_ATOMIC 0x1
|
|
|
|
#define SPE_OP_PKT_HDR_CLASS_BR_ERET 0x2
|
|
|
|
|
2020-11-19 08:24:41 -07:00
|
|
|
#define SPE_OP_PKT_IS_OTHER_SVE_OP(v) (((v) & (BIT(7) | BIT(3) | BIT(0))) == 0x8)
|
|
|
|
|
2020-11-19 08:24:38 -07:00
|
|
|
#define SPE_OP_PKT_COND BIT(0)
|
|
|
|
|
|
|
|
#define SPE_OP_PKT_LDST_SUBCLASS_GET(v) ((v) & GENMASK_ULL(7, 1))
|
|
|
|
#define SPE_OP_PKT_LDST_SUBCLASS_GP_REG 0x0
|
|
|
|
#define SPE_OP_PKT_LDST_SUBCLASS_SIMD_FP 0x4
|
|
|
|
#define SPE_OP_PKT_LDST_SUBCLASS_UNSPEC_REG 0x10
|
|
|
|
#define SPE_OP_PKT_LDST_SUBCLASS_NV_SYSREG 0x30
|
2023-03-27 09:20:57 -07:00
|
|
|
#define SPE_OP_PKT_LDST_SUBCLASS_MTE_TAG 0x14
|
|
|
|
#define SPE_OP_PKT_LDST_SUBCLASS_MEMCPY 0x20
|
|
|
|
#define SPE_OP_PKT_LDST_SUBCLASS_MEMSET 0x25
|
2020-11-19 08:24:38 -07:00
|
|
|
|
|
|
|
#define SPE_OP_PKT_IS_LDST_ATOMIC(v) (((v) & (GENMASK_ULL(7, 5) | BIT(1))) == 0x2)
|
|
|
|
|
|
|
|
#define SPE_OP_PKT_AR BIT(4)
|
|
|
|
#define SPE_OP_PKT_EXCL BIT(3)
|
|
|
|
#define SPE_OP_PKT_AT BIT(2)
|
|
|
|
#define SPE_OP_PKT_ST BIT(0)
|
|
|
|
|
2020-11-19 08:24:41 -07:00
|
|
|
#define SPE_OP_PKT_IS_LDST_SVE(v) (((v) & (BIT(3) | BIT(1))) == 0x8)
|
|
|
|
|
|
|
|
#define SPE_OP_PKT_SVE_SG BIT(7)
|
|
|
|
/*
|
|
|
|
* SVE effective vector length (EVL) is stored in byte 0 bits [6:4];
|
|
|
|
* the length is rounded up to a power of two and use 32 as one step,
|
|
|
|
* so EVL calculation is:
|
|
|
|
*
|
|
|
|
* 32 * (2 ^ bits [6:4]) = 32 << (bits [6:4])
|
|
|
|
*/
|
|
|
|
#define SPE_OP_PKG_SVE_EVL(v) (32 << (((v) & GENMASK_ULL(6, 4)) >> 4))
|
|
|
|
#define SPE_OP_PKT_SVE_PRED BIT(2)
|
|
|
|
#define SPE_OP_PKT_SVE_FP BIT(1)
|
|
|
|
|
2020-11-19 08:24:38 -07:00
|
|
|
#define SPE_OP_PKT_IS_INDIRECT_BRANCH(v) (((v) & GENMASK_ULL(7, 1)) == 0x2)
|
|
|
|
|
2018-01-14 12:28:50 -07:00
|
|
|
const char *arm_spe_pkt_name(enum arm_spe_pkt_type);
|
|
|
|
|
|
|
|
int arm_spe_get_packet(const unsigned char *buf, size_t len,
|
|
|
|
struct arm_spe_pkt *packet);
|
|
|
|
|
|
|
|
int arm_spe_pkt_desc(const struct arm_spe_pkt *packet, char *buf, size_t len);
|
|
|
|
#endif
|