1

libbpf: Recognize __arena global variables.

LLVM automatically places __arena variables into ".arena.1" ELF section.
In order to use such global variables bpf program must include definition
of arena map in ".maps" section, like:
struct {
       __uint(type, BPF_MAP_TYPE_ARENA);
       __uint(map_flags, BPF_F_MMAPABLE);
       __uint(max_entries, 1000);         /* number of pages */
       __ulong(map_extra, 2ull << 44);    /* start of mmap() region */
} arena SEC(".maps");

libbpf recognizes both uses of arena and creates single `struct bpf_map *`
instance in libbpf APIs.
".arena.1" ELF section data is used as initial data image, which is exposed
through skeleton and bpf_map__initial_value() to the user, if they need to tune
it before the load phase. During load phase, this initial image is copied over
into mmap()'ed region corresponding to arena, and discarded.

Few small checks here and there had to be added to make sure this
approach works with bpf_map__initial_value(), mostly due to hard-coded
assumption that map->mmaped is set up with mmap() syscall and should be
munmap()'ed. For arena, .arena.1 can be (much) smaller than maximum
arena size, so this smaller data size has to be tracked separately.
Given it is enforced that there is only one arena for entire bpf_object
instance, we just keep it in a separate field. This can be generalized
if necessary later.

All global variables from ".arena.1" section are accessible from user space
via skel->arena->name_of_var.

For bss/data/rodata the skeleton/libbpf perform the following sequence:
1. addr = mmap(MAP_ANONYMOUS)
2. user space optionally modifies global vars
3. map_fd = bpf_create_map()
4. bpf_update_map_elem(map_fd, addr) // to store values into the kernel
5. mmap(addr, MAP_FIXED, map_fd)
after step 5 user spaces see the values it wrote at step 2 at the same addresses

arena doesn't support update_map_elem. Hence skeleton/libbpf do:
1. addr = malloc(sizeof SEC ".arena.1")
2. user space optionally modifies global vars
3. map_fd = bpf_create_map(MAP_TYPE_ARENA)
4. real_addr = mmap(map->map_extra, MAP_SHARED | MAP_FIXED, map_fd)
5. memcpy(real_addr, addr) // this will fault-in and allocate pages

At the end look and feel of global data vs __arena global data is the same from
bpf prog pov.

Another complication is:
struct {
  __uint(type, BPF_MAP_TYPE_ARENA);
} arena SEC(".maps");

int __arena foo;
int bar;

  ptr1 = &foo;   // relocation against ".arena.1" section
  ptr2 = &arena; // relocation against ".maps" section
  ptr3 = &bar;   // relocation against ".bss" section

Fo the kernel ptr1 and ptr2 has point to the same arena's map_fd
while ptr3 points to a different global array's map_fd.
For the verifier:
ptr1->type == unknown_scalar
ptr2->type == const_ptr_to_map
ptr3->type == ptr_to_map_value

After verification, from JIT pov all 3 ptr-s are normal ld_imm64 insns.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Quentin Monnet <quentin@isovalent.com>
Link: https://lore.kernel.org/bpf/20240308010812.89848-11-alexei.starovoitov@gmail.com
This commit is contained in:
Andrii Nakryiko 2024-03-07 17:08:08 -08:00
parent eed512e8ac
commit 2e7ba4f8fd
3 changed files with 120 additions and 13 deletions

View File

@ -120,6 +120,12 @@ static bool get_datasec_ident(const char *sec_name, char *buf, size_t buf_sz)
static const char *pfxs[] = { ".data", ".rodata", ".bss", ".kconfig" }; static const char *pfxs[] = { ".data", ".rodata", ".bss", ".kconfig" };
int i, n; int i, n;
/* recognize hard coded LLVM section name */
if (strcmp(sec_name, ".arena.1") == 0) {
/* this is the name to use in skeleton */
snprintf(buf, buf_sz, "arena");
return true;
}
for (i = 0, n = ARRAY_SIZE(pfxs); i < n; i++) { for (i = 0, n = ARRAY_SIZE(pfxs); i < n; i++) {
const char *pfx = pfxs[i]; const char *pfx = pfxs[i];
@ -250,6 +256,13 @@ static const struct btf_type *find_type_for_map(struct btf *btf, const char *map
static bool is_mmapable_map(const struct bpf_map *map, char *buf, size_t sz) static bool is_mmapable_map(const struct bpf_map *map, char *buf, size_t sz)
{ {
size_t tmp_sz;
if (bpf_map__type(map) == BPF_MAP_TYPE_ARENA && bpf_map__initial_value(map, &tmp_sz)) {
snprintf(buf, sz, "arena");
return true;
}
if (!bpf_map__is_internal(map) || !(bpf_map__map_flags(map) & BPF_F_MMAPABLE)) if (!bpf_map__is_internal(map) || !(bpf_map__map_flags(map) & BPF_F_MMAPABLE))
return false; return false;

View File

@ -498,6 +498,7 @@ struct bpf_struct_ops {
#define KSYMS_SEC ".ksyms" #define KSYMS_SEC ".ksyms"
#define STRUCT_OPS_SEC ".struct_ops" #define STRUCT_OPS_SEC ".struct_ops"
#define STRUCT_OPS_LINK_SEC ".struct_ops.link" #define STRUCT_OPS_LINK_SEC ".struct_ops.link"
#define ARENA_SEC ".arena.1"
enum libbpf_map_type { enum libbpf_map_type {
LIBBPF_MAP_UNSPEC, LIBBPF_MAP_UNSPEC,
@ -629,6 +630,7 @@ struct elf_state {
Elf *elf; Elf *elf;
Elf64_Ehdr *ehdr; Elf64_Ehdr *ehdr;
Elf_Data *symbols; Elf_Data *symbols;
Elf_Data *arena_data;
size_t shstrndx; /* section index for section name strings */ size_t shstrndx; /* section index for section name strings */
size_t strtabidx; size_t strtabidx;
struct elf_sec_desc *secs; struct elf_sec_desc *secs;
@ -638,6 +640,7 @@ struct elf_state {
int text_shndx; int text_shndx;
int symbols_shndx; int symbols_shndx;
bool has_st_ops; bool has_st_ops;
int arena_data_shndx;
}; };
struct usdt_manager; struct usdt_manager;
@ -697,6 +700,10 @@ struct bpf_object {
struct usdt_manager *usdt_man; struct usdt_manager *usdt_man;
struct bpf_map *arena_map;
void *arena_data;
size_t arena_data_sz;
struct kern_feature_cache *feat_cache; struct kern_feature_cache *feat_cache;
char *token_path; char *token_path;
int token_fd; int token_fd;
@ -1443,6 +1450,7 @@ static void bpf_object__elf_finish(struct bpf_object *obj)
elf_end(obj->efile.elf); elf_end(obj->efile.elf);
obj->efile.elf = NULL; obj->efile.elf = NULL;
obj->efile.symbols = NULL; obj->efile.symbols = NULL;
obj->efile.arena_data = NULL;
zfree(&obj->efile.secs); zfree(&obj->efile.secs);
obj->efile.sec_cnt = 0; obj->efile.sec_cnt = 0;
@ -1851,7 +1859,7 @@ bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type,
def->value_size = data_sz; def->value_size = data_sz;
def->max_entries = 1; def->max_entries = 1;
def->map_flags = type == LIBBPF_MAP_RODATA || type == LIBBPF_MAP_KCONFIG def->map_flags = type == LIBBPF_MAP_RODATA || type == LIBBPF_MAP_KCONFIG
? BPF_F_RDONLY_PROG : 0; ? BPF_F_RDONLY_PROG : 0;
/* failures are fine because of maps like .rodata.str1.1 */ /* failures are fine because of maps like .rodata.str1.1 */
(void) map_fill_btf_type_info(obj, map); (void) map_fill_btf_type_info(obj, map);
@ -2843,6 +2851,32 @@ static int bpf_object__init_user_btf_map(struct bpf_object *obj,
return 0; return 0;
} }
static int init_arena_map_data(struct bpf_object *obj, struct bpf_map *map,
const char *sec_name, int sec_idx,
void *data, size_t data_sz)
{
const long page_sz = sysconf(_SC_PAGE_SIZE);
size_t mmap_sz;
mmap_sz = bpf_map_mmap_sz(obj->arena_map);
if (roundup(data_sz, page_sz) > mmap_sz) {
pr_warn("elf: sec '%s': declared ARENA map size (%zu) is too small to hold global __arena variables of size %zu\n",
sec_name, mmap_sz, data_sz);
return -E2BIG;
}
obj->arena_data = malloc(data_sz);
if (!obj->arena_data)
return -ENOMEM;
memcpy(obj->arena_data, data, data_sz);
obj->arena_data_sz = data_sz;
/* make bpf_map__init_value() work for ARENA maps */
map->mmaped = obj->arena_data;
return 0;
}
static int bpf_object__init_user_btf_maps(struct bpf_object *obj, bool strict, static int bpf_object__init_user_btf_maps(struct bpf_object *obj, bool strict,
const char *pin_root_path) const char *pin_root_path)
{ {
@ -2892,6 +2926,33 @@ static int bpf_object__init_user_btf_maps(struct bpf_object *obj, bool strict,
return err; return err;
} }
for (i = 0; i < obj->nr_maps; i++) {
struct bpf_map *map = &obj->maps[i];
if (map->def.type != BPF_MAP_TYPE_ARENA)
continue;
if (obj->arena_map) {
pr_warn("map '%s': only single ARENA map is supported (map '%s' is also ARENA)\n",
map->name, obj->arena_map->name);
return -EINVAL;
}
obj->arena_map = map;
if (obj->efile.arena_data) {
err = init_arena_map_data(obj, map, ARENA_SEC, obj->efile.arena_data_shndx,
obj->efile.arena_data->d_buf,
obj->efile.arena_data->d_size);
if (err)
return err;
}
}
if (obj->efile.arena_data && !obj->arena_map) {
pr_warn("elf: sec '%s': to use global __arena variables the ARENA map should be explicitly declared in SEC(\".maps\")\n",
ARENA_SEC);
return -ENOENT;
}
return 0; return 0;
} }
@ -3771,6 +3832,9 @@ static int bpf_object__elf_collect(struct bpf_object *obj)
sec_desc->shdr = sh; sec_desc->shdr = sh;
sec_desc->data = data; sec_desc->data = data;
obj->efile.has_st_ops = true; obj->efile.has_st_ops = true;
} else if (strcmp(name, ARENA_SEC) == 0) {
obj->efile.arena_data = data;
obj->efile.arena_data_shndx = idx;
} else { } else {
pr_info("elf: skipping unrecognized data section(%d) %s\n", pr_info("elf: skipping unrecognized data section(%d) %s\n",
idx, name); idx, name);
@ -4400,6 +4464,15 @@ static int bpf_program__record_reloc(struct bpf_program *prog,
type = bpf_object__section_to_libbpf_map_type(obj, shdr_idx); type = bpf_object__section_to_libbpf_map_type(obj, shdr_idx);
sym_sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, shdr_idx)); sym_sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, shdr_idx));
/* arena data relocation */
if (shdr_idx == obj->efile.arena_data_shndx) {
reloc_desc->type = RELO_DATA;
reloc_desc->insn_idx = insn_idx;
reloc_desc->map_idx = obj->arena_map - obj->maps;
reloc_desc->sym_off = sym->st_value;
return 0;
}
/* generic map reference relocation */ /* generic map reference relocation */
if (type == LIBBPF_MAP_UNSPEC) { if (type == LIBBPF_MAP_UNSPEC) {
if (!bpf_object__shndx_is_maps(obj, shdr_idx)) { if (!bpf_object__shndx_is_maps(obj, shdr_idx)) {
@ -4940,6 +5013,7 @@ bpf_object__populate_internal_map(struct bpf_object *obj, struct bpf_map *map)
bpf_gen__map_freeze(obj->gen_loader, map - obj->maps); bpf_gen__map_freeze(obj->gen_loader, map - obj->maps);
return 0; return 0;
} }
err = bpf_map_update_elem(map->fd, &zero, map->mmaped, 0); err = bpf_map_update_elem(map->fd, &zero, map->mmaped, 0);
if (err) { if (err) {
err = -errno; err = -errno;
@ -5289,6 +5363,10 @@ retry:
map->name, err); map->name, err);
return err; return err;
} }
if (obj->arena_data) {
memcpy(map->mmaped, obj->arena_data, obj->arena_data_sz);
zfree(&obj->arena_data);
}
} }
if (map->init_slots_sz && map->def.type != BPF_MAP_TYPE_PROG_ARRAY) { if (map->init_slots_sz && map->def.type != BPF_MAP_TYPE_PROG_ARRAY) {
err = init_map_in_map_slots(obj, map); err = init_map_in_map_slots(obj, map);
@ -8786,13 +8864,9 @@ static void bpf_map__destroy(struct bpf_map *map)
zfree(&map->init_slots); zfree(&map->init_slots);
map->init_slots_sz = 0; map->init_slots_sz = 0;
if (map->mmaped) { if (map->mmaped && map->mmaped != map->obj->arena_data)
size_t mmap_sz; munmap(map->mmaped, bpf_map_mmap_sz(map));
map->mmaped = NULL;
mmap_sz = bpf_map_mmap_sz(map);
munmap(map->mmaped, mmap_sz);
map->mmaped = NULL;
}
if (map->st_ops) { if (map->st_ops) {
zfree(&map->st_ops->data); zfree(&map->st_ops->data);
@ -8852,6 +8926,8 @@ void bpf_object__close(struct bpf_object *obj)
if (obj->token_fd > 0) if (obj->token_fd > 0)
close(obj->token_fd); close(obj->token_fd);
zfree(&obj->arena_data);
free(obj); free(obj);
} }
@ -10063,18 +10139,26 @@ __u32 bpf_map__btf_value_type_id(const struct bpf_map *map)
int bpf_map__set_initial_value(struct bpf_map *map, int bpf_map__set_initial_value(struct bpf_map *map,
const void *data, size_t size) const void *data, size_t size)
{ {
size_t actual_sz;
if (map->obj->loaded || map->reused) if (map->obj->loaded || map->reused)
return libbpf_err(-EBUSY); return libbpf_err(-EBUSY);
if (!map->mmaped || map->libbpf_type == LIBBPF_MAP_KCONFIG || if (!map->mmaped || map->libbpf_type == LIBBPF_MAP_KCONFIG)
size != map->def.value_size) return libbpf_err(-EINVAL);
if (map->def.type == BPF_MAP_TYPE_ARENA)
actual_sz = map->obj->arena_data_sz;
else
actual_sz = map->def.value_size;
if (size != actual_sz)
return libbpf_err(-EINVAL); return libbpf_err(-EINVAL);
memcpy(map->mmaped, data, size); memcpy(map->mmaped, data, size);
return 0; return 0;
} }
void *bpf_map__initial_value(struct bpf_map *map, size_t *psize) void *bpf_map__initial_value(const struct bpf_map *map, size_t *psize)
{ {
if (bpf_map__is_struct_ops(map)) { if (bpf_map__is_struct_ops(map)) {
if (psize) if (psize)
@ -10084,7 +10168,12 @@ void *bpf_map__initial_value(struct bpf_map *map, size_t *psize)
if (!map->mmaped) if (!map->mmaped)
return NULL; return NULL;
*psize = map->def.value_size;
if (map->def.type == BPF_MAP_TYPE_ARENA)
*psize = map->obj->arena_data_sz;
else
*psize = map->def.value_size;
return map->mmaped; return map->mmaped;
} }
@ -13573,6 +13662,11 @@ int bpf_object__load_skeleton(struct bpf_object_skeleton *s)
continue; continue;
} }
if (map->def.type == BPF_MAP_TYPE_ARENA) {
*mmaped = map->mmaped;
continue;
}
if (map->def.map_flags & BPF_F_RDONLY_PROG) if (map->def.map_flags & BPF_F_RDONLY_PROG)
prot = PROT_READ; prot = PROT_READ;
else else

View File

@ -1014,7 +1014,7 @@ LIBBPF_API int bpf_map__set_map_extra(struct bpf_map *map, __u64 map_extra);
LIBBPF_API int bpf_map__set_initial_value(struct bpf_map *map, LIBBPF_API int bpf_map__set_initial_value(struct bpf_map *map,
const void *data, size_t size); const void *data, size_t size);
LIBBPF_API void *bpf_map__initial_value(struct bpf_map *map, size_t *psize); LIBBPF_API void *bpf_map__initial_value(const struct bpf_map *map, size_t *psize);
/** /**
* @brief **bpf_map__is_internal()** tells the caller whether or not the * @brief **bpf_map__is_internal()** tells the caller whether or not the