perf annotate: LLVM-based disassembler
Support using LLVM as a disassembler method, allowing helperless annotation in non-distro builds. (It is also much faster than using libbfd or bfd objdump on binaries with a lot of debug information.) This is nearly identical to the output of llvm-objdump; there are some very rare whitespace differences, some minor changes to demangling (since we use perf's regular demangling and not LLVM's own) and the occasional case where llvm-objdump makes a different choice when multiple symbols share the same address. It should work across all of LLVM's supported architectures, although I've only tested 64-bit x86, and finding the right triple from perf's idea of machine architecture can sometimes be a bit tricky. Ideally, we should have some way of finding the triplet just from the file itself. Committer notes: Address this on 32-bit systems by using PRIu64 from inttypes.h 3 17.58 almalinux:9-i386 : FAIL gcc version 11.4.1 20231218 (Red Hat 11.4.1-3) (GCC) util/llvm-c-helpers.cpp: In function ‘char* make_symbol_relative_string(dso*, const char*, u64, u64)’: util/llvm-c-helpers.cpp:150:52: error: format ‘%lx’ expects argument of type ‘long unsigned int’, but argument 5 has type ‘u64’ {aka +‘long long unsigned int’} [-Werror=format=] 150 | snprintf(buf, sizeof(buf), "%s+0x%lx", | ~~^ | | | long unsigned int | %llx 151 | demangled ? demangled : sym_name, addr - base_addr); | ~~~~~~~~~~~~~~~~ | | | u64 {aka long long unsigned int} cc1plus: all warnings being treated as errors Signed-off-by: Steinar H. Gunderson <sesse@google.com> Cc: Ian Rogers <irogers@google.com> Link: https://lore.kernel.org/r/20240803152008.2818485-3-sesse@google.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
This commit is contained in:
parent
6eca7c5ac2
commit
0488568178
@ -48,6 +48,7 @@ static int call__scnprintf(struct ins *ins, char *bf, size_t size,
|
||||
static void ins__sort(struct arch *arch);
|
||||
static int disasm_line__parse(char *line, const char **namep, char **rawp);
|
||||
static int disasm_line__parse_powerpc(struct disasm_line *dl);
|
||||
static char *expand_tabs(char *line, char **storage, size_t *storage_len);
|
||||
|
||||
static __attribute__((constructor)) void symbol__init_regexpr(void)
|
||||
{
|
||||
@ -1354,7 +1355,9 @@ static int open_capstone_handle(struct annotate_args *args, bool is_64bit,
|
||||
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(HAVE_LIBCAPSTONE_SUPPORT) || defined(HAVE_LIBLLVM_SUPPORT)
|
||||
struct find_file_offset_data {
|
||||
u64 ip;
|
||||
u64 offset;
|
||||
@ -1418,7 +1421,9 @@ err:
|
||||
free(buf);
|
||||
return NULL;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_LIBCAPSTONE_SUPPORT
|
||||
static void print_capstone_detail(cs_insn *insn, char *buf, size_t len,
|
||||
struct annotate_args *args, u64 addr)
|
||||
{
|
||||
@ -1805,6 +1810,189 @@ err:
|
||||
count = -1;
|
||||
goto out;
|
||||
}
|
||||
|
||||
#ifdef HAVE_LIBLLVM_SUPPORT
|
||||
#include <llvm-c/Disassembler.h>
|
||||
#include <llvm-c/Target.h>
|
||||
#include "util/llvm-c-helpers.h"
|
||||
|
||||
struct symbol_lookup_storage {
|
||||
u64 branch_addr;
|
||||
u64 pcrel_load_addr;
|
||||
};
|
||||
|
||||
/*
|
||||
* Whenever LLVM wants to resolve an address into a symbol, it calls this
|
||||
* callback. We don't ever actually _return_ anything (in particular, because
|
||||
* it puts quotation marks around what we return), but we use this as a hint
|
||||
* that there is a branch or PC-relative address in the expression that we
|
||||
* should add some textual annotation for after the instruction. The caller
|
||||
* will use this information to add the actual annotation.
|
||||
*/
|
||||
static const char *
|
||||
symbol_lookup_callback(void *disinfo, uint64_t value,
|
||||
uint64_t *ref_type,
|
||||
uint64_t address __maybe_unused,
|
||||
const char **ref __maybe_unused)
|
||||
{
|
||||
struct symbol_lookup_storage *storage = disinfo;
|
||||
|
||||
if (*ref_type == LLVMDisassembler_ReferenceType_In_Branch)
|
||||
storage->branch_addr = value;
|
||||
else if (*ref_type == LLVMDisassembler_ReferenceType_In_PCrel_Load)
|
||||
storage->pcrel_load_addr = value;
|
||||
*ref_type = LLVMDisassembler_ReferenceType_InOut_None;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static int symbol__disassemble_llvm(char *filename, struct symbol *sym,
|
||||
struct annotate_args *args)
|
||||
{
|
||||
struct annotation *notes = symbol__annotation(sym);
|
||||
struct map *map = args->ms.map;
|
||||
struct dso *dso = map__dso(map);
|
||||
u64 start = map__rip_2objdump(map, sym->start);
|
||||
u8 *buf;
|
||||
u64 len;
|
||||
u64 pc;
|
||||
bool is_64bit;
|
||||
char triplet[64];
|
||||
char disasm_buf[2048];
|
||||
size_t disasm_len;
|
||||
struct disasm_line *dl;
|
||||
LLVMDisasmContextRef disasm = NULL;
|
||||
struct symbol_lookup_storage storage;
|
||||
char *line_storage = NULL;
|
||||
size_t line_storage_len = 0;
|
||||
int ret = -1;
|
||||
|
||||
if (args->options->objdump_path)
|
||||
return -1;
|
||||
|
||||
LLVMInitializeAllTargetInfos();
|
||||
LLVMInitializeAllTargetMCs();
|
||||
LLVMInitializeAllDisassemblers();
|
||||
|
||||
buf = read_symbol(filename, map, sym, &len, &is_64bit);
|
||||
if (buf == NULL)
|
||||
return -1;
|
||||
|
||||
if (arch__is(args->arch, "x86")) {
|
||||
if (is_64bit)
|
||||
scnprintf(triplet, sizeof(triplet), "x86_64-pc-linux");
|
||||
else
|
||||
scnprintf(triplet, sizeof(triplet), "i686-pc-linux");
|
||||
} else {
|
||||
scnprintf(triplet, sizeof(triplet), "%s-linux-gnu",
|
||||
args->arch->name);
|
||||
}
|
||||
|
||||
disasm = LLVMCreateDisasm(triplet, &storage, 0, NULL,
|
||||
symbol_lookup_callback);
|
||||
if (disasm == NULL)
|
||||
goto err;
|
||||
|
||||
if (args->options->disassembler_style &&
|
||||
!strcmp(args->options->disassembler_style, "intel"))
|
||||
LLVMSetDisasmOptions(disasm,
|
||||
LLVMDisassembler_Option_AsmPrinterVariant);
|
||||
|
||||
/*
|
||||
* This needs to be set after AsmPrinterVariant, due to a bug in LLVM;
|
||||
* setting AsmPrinterVariant makes a new instruction printer, making it
|
||||
* forget about the PrintImmHex flag (which is applied before if both
|
||||
* are given to the same call).
|
||||
*/
|
||||
LLVMSetDisasmOptions(disasm, LLVMDisassembler_Option_PrintImmHex);
|
||||
|
||||
/* add the function address and name */
|
||||
scnprintf(disasm_buf, sizeof(disasm_buf), "%#"PRIx64" <%s>:",
|
||||
start, sym->name);
|
||||
|
||||
args->offset = -1;
|
||||
args->line = disasm_buf;
|
||||
args->line_nr = 0;
|
||||
args->fileloc = NULL;
|
||||
args->ms.sym = sym;
|
||||
|
||||
dl = disasm_line__new(args);
|
||||
if (dl == NULL)
|
||||
goto err;
|
||||
|
||||
annotation_line__add(&dl->al, ¬es->src->source);
|
||||
|
||||
pc = start;
|
||||
for (u64 offset = 0; offset < len; ) {
|
||||
unsigned int ins_len;
|
||||
|
||||
storage.branch_addr = 0;
|
||||
storage.pcrel_load_addr = 0;
|
||||
|
||||
ins_len = LLVMDisasmInstruction(disasm, buf + offset,
|
||||
len - offset, pc,
|
||||
disasm_buf, sizeof(disasm_buf));
|
||||
if (ins_len == 0)
|
||||
goto err;
|
||||
disasm_len = strlen(disasm_buf);
|
||||
|
||||
if (storage.branch_addr != 0) {
|
||||
char *name = llvm_name_for_code(dso, filename,
|
||||
storage.branch_addr);
|
||||
if (name != NULL) {
|
||||
disasm_len += scnprintf(disasm_buf + disasm_len,
|
||||
sizeof(disasm_buf) -
|
||||
disasm_len,
|
||||
" <%s>", name);
|
||||
free(name);
|
||||
}
|
||||
}
|
||||
if (storage.pcrel_load_addr != 0) {
|
||||
char *name = llvm_name_for_data(dso, filename,
|
||||
storage.pcrel_load_addr);
|
||||
disasm_len += scnprintf(disasm_buf + disasm_len,
|
||||
sizeof(disasm_buf) - disasm_len,
|
||||
" # %#"PRIx64,
|
||||
storage.pcrel_load_addr);
|
||||
if (name) {
|
||||
disasm_len += scnprintf(disasm_buf + disasm_len,
|
||||
sizeof(disasm_buf) -
|
||||
disasm_len,
|
||||
" <%s>", name);
|
||||
free(name);
|
||||
}
|
||||
}
|
||||
|
||||
args->offset = offset;
|
||||
args->line = expand_tabs(disasm_buf, &line_storage,
|
||||
&line_storage_len);
|
||||
args->line_nr = 0;
|
||||
args->fileloc = NULL;
|
||||
args->ms.sym = sym;
|
||||
|
||||
llvm_addr2line(filename, pc, &args->fileloc,
|
||||
(unsigned int *)&args->line_nr, false, NULL);
|
||||
|
||||
dl = disasm_line__new(args);
|
||||
if (dl == NULL)
|
||||
goto err;
|
||||
|
||||
annotation_line__add(&dl->al, ¬es->src->source);
|
||||
|
||||
free(args->fileloc);
|
||||
pc += ins_len;
|
||||
offset += ins_len;
|
||||
}
|
||||
|
||||
ret = 0;
|
||||
|
||||
err:
|
||||
LLVMDisasmDispose(disasm);
|
||||
free(buf);
|
||||
free(line_storage);
|
||||
return ret;
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Possibly create a new version of line with tabs expanded. Returns the
|
||||
* existing or new line, storage is updated if a new line is allocated. If
|
||||
@ -1951,6 +2139,11 @@ int symbol__disassemble(struct symbol *sym, struct annotate_args *args)
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef HAVE_LIBLLVM_SUPPORT
|
||||
err = symbol__disassemble_llvm(symfs_filename, sym, args);
|
||||
if (err == 0)
|
||||
goto out_remove_tmp;
|
||||
#endif
|
||||
#ifdef HAVE_LIBCAPSTONE_SUPPORT
|
||||
err = symbol__disassemble_capstone(symfs_filename, sym, args);
|
||||
if (err == 0)
|
||||
|
@ -8,8 +8,10 @@
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wunused-parameter" /* Needed for LLVM <= 15 */
|
||||
#include <llvm/DebugInfo/Symbolize/Symbolize.h>
|
||||
#include <llvm/Support/TargetSelect.h>
|
||||
#pragma GCC diagnostic pop
|
||||
|
||||
#include <inttypes.h>
|
||||
#include <stdio.h>
|
||||
#include <sys/types.h>
|
||||
#include <linux/compiler.h>
|
||||
@ -19,6 +21,9 @@ extern "C" {
|
||||
#include "symbol_conf.h"
|
||||
#include "llvm-c-helpers.h"
|
||||
|
||||
extern "C"
|
||||
char *dso__demangle_sym(struct dso *dso, int kmodule, const char *elf_name);
|
||||
|
||||
using namespace llvm;
|
||||
using llvm::symbolize::LLVMSymbolizer;
|
||||
|
||||
@ -132,3 +137,61 @@ int llvm_addr2line(const char *dso_name, u64 addr,
|
||||
return extract_file_and_line(*res_or_err, file, line);
|
||||
}
|
||||
}
|
||||
|
||||
static char *
|
||||
make_symbol_relative_string(struct dso *dso, const char *sym_name,
|
||||
u64 addr, u64 base_addr)
|
||||
{
|
||||
if (!strcmp(sym_name, "<invalid>"))
|
||||
return NULL;
|
||||
|
||||
char *demangled = dso__demangle_sym(dso, 0, sym_name);
|
||||
if (base_addr && base_addr != addr) {
|
||||
char buf[256];
|
||||
snprintf(buf, sizeof(buf), "%s+0x%" PRIx64,
|
||||
demangled ? demangled : sym_name, addr - base_addr);
|
||||
free(demangled);
|
||||
return strdup(buf);
|
||||
} else {
|
||||
if (demangled)
|
||||
return demangled;
|
||||
else
|
||||
return strdup(sym_name);
|
||||
}
|
||||
}
|
||||
|
||||
extern "C"
|
||||
char *llvm_name_for_code(struct dso *dso, const char *dso_name, u64 addr)
|
||||
{
|
||||
LLVMSymbolizer *symbolizer = get_symbolizer();
|
||||
object::SectionedAddress sectioned_addr = {
|
||||
addr,
|
||||
object::SectionedAddress::UndefSection
|
||||
};
|
||||
Expected<DILineInfo> res_or_err =
|
||||
symbolizer->symbolizeCode(dso_name, sectioned_addr);
|
||||
if (!res_or_err) {
|
||||
return NULL;
|
||||
}
|
||||
return make_symbol_relative_string(
|
||||
dso, res_or_err->FunctionName.c_str(),
|
||||
addr, res_or_err->StartAddress ? *res_or_err->StartAddress : 0);
|
||||
}
|
||||
|
||||
extern "C"
|
||||
char *llvm_name_for_data(struct dso *dso, const char *dso_name, u64 addr)
|
||||
{
|
||||
LLVMSymbolizer *symbolizer = get_symbolizer();
|
||||
object::SectionedAddress sectioned_addr = {
|
||||
addr,
|
||||
object::SectionedAddress::UndefSection
|
||||
};
|
||||
Expected<DIGlobal> res_or_err =
|
||||
symbolizer->symbolizeData(dso_name, sectioned_addr);
|
||||
if (!res_or_err) {
|
||||
return NULL;
|
||||
}
|
||||
return make_symbol_relative_string(
|
||||
dso, res_or_err->Name.c_str(),
|
||||
addr, res_or_err->Start);
|
||||
}
|
||||
|
@ -13,6 +13,8 @@
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
struct dso;
|
||||
|
||||
struct llvm_a2l_frame {
|
||||
char* filename;
|
||||
char* funcname;
|
||||
@ -42,6 +44,15 @@ int llvm_addr2line(const char* dso_name,
|
||||
bool unwind_inlines,
|
||||
struct llvm_a2l_frame** inline_frames);
|
||||
|
||||
/*
|
||||
* Simple symbolizers for addresses; will convert something like
|
||||
* 0x12345 to "func+0x123". Will return NULL if no symbol was found.
|
||||
*
|
||||
* The returned value must be freed by the caller, with free().
|
||||
*/
|
||||
char *llvm_name_for_code(struct dso *dso, const char *dso_name, u64 addr);
|
||||
char *llvm_name_for_data(struct dso *dso, const char *dso_name, u64 addr);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
Loading…
Reference in New Issue
Block a user