1

mnt_idmapping: decouple from namespaces

There's no reason we need to couple mnt idmapping to namespaces in the
way we currently do. Copy the idmapping when an idmapped mount is
created and don't take any reference on the namespace at all.

We also can't easily refcount struct uid_gid_map because it needs to
stay the size of a cacheline otherwise we risk performance regressions
(Ignoring for a second that right now struct uid_gid_map isn't actually
 64 byte but 72 but that's a fix for another patch series.).

Link: https://lore.kernel.org/r/20231122-vfs-mnt_idmap-v1-3-dae4abdde5bd@kernel.org
Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: Christian Brauner <brauner@kernel.org>
This commit is contained in:
Christian Brauner 2023-11-22 13:44:39 +01:00
parent 90fbd8b175
commit 783822e445
3 changed files with 106 additions and 17 deletions

View File

@ -9,8 +9,16 @@
#include "internal.h"
/*
* Outside of this file vfs{g,u}id_t are always created from k{g,u}id_t,
* never from raw values. These are just internal helpers.
*/
#define VFSUIDT_INIT_RAW(val) (vfsuid_t){ val }
#define VFSGIDT_INIT_RAW(val) (vfsgid_t){ val }
struct mnt_idmap {
struct user_namespace *owner;
struct uid_gid_map uid_map;
struct uid_gid_map gid_map;
refcount_t count;
};
@ -20,7 +28,6 @@ struct mnt_idmap {
* mapped to {g,u}id 1, [...], {g,u}id 1000 to {g,u}id 1000, [...].
*/
struct mnt_idmap nop_mnt_idmap = {
.owner = &init_user_ns,
.count = REFCOUNT_INIT(1),
};
EXPORT_SYMBOL_GPL(nop_mnt_idmap);
@ -65,7 +72,6 @@ vfsuid_t make_vfsuid(struct mnt_idmap *idmap,
kuid_t kuid)
{
uid_t uid;
struct user_namespace *mnt_userns = idmap->owner;
if (idmap == &nop_mnt_idmap)
return VFSUIDT_INIT(kuid);
@ -75,7 +81,7 @@ vfsuid_t make_vfsuid(struct mnt_idmap *idmap,
uid = from_kuid(fs_userns, kuid);
if (uid == (uid_t)-1)
return INVALID_VFSUID;
return VFSUIDT_INIT(make_kuid(mnt_userns, uid));
return VFSUIDT_INIT_RAW(map_id_down(&idmap->uid_map, uid));
}
EXPORT_SYMBOL_GPL(make_vfsuid);
@ -103,7 +109,6 @@ vfsgid_t make_vfsgid(struct mnt_idmap *idmap,
struct user_namespace *fs_userns, kgid_t kgid)
{
gid_t gid;
struct user_namespace *mnt_userns = idmap->owner;
if (idmap == &nop_mnt_idmap)
return VFSGIDT_INIT(kgid);
@ -113,7 +118,7 @@ vfsgid_t make_vfsgid(struct mnt_idmap *idmap,
gid = from_kgid(fs_userns, kgid);
if (gid == (gid_t)-1)
return INVALID_VFSGID;
return VFSGIDT_INIT(make_kgid(mnt_userns, gid));
return VFSGIDT_INIT_RAW(map_id_down(&idmap->gid_map, gid));
}
EXPORT_SYMBOL_GPL(make_vfsgid);
@ -132,11 +137,10 @@ kuid_t from_vfsuid(struct mnt_idmap *idmap,
struct user_namespace *fs_userns, vfsuid_t vfsuid)
{
uid_t uid;
struct user_namespace *mnt_userns = idmap->owner;
if (idmap == &nop_mnt_idmap)
return AS_KUIDT(vfsuid);
uid = from_kuid(mnt_userns, AS_KUIDT(vfsuid));
uid = map_id_up(&idmap->uid_map, __vfsuid_val(vfsuid));
if (uid == (uid_t)-1)
return INVALID_UID;
if (initial_idmapping(fs_userns))
@ -160,11 +164,10 @@ kgid_t from_vfsgid(struct mnt_idmap *idmap,
struct user_namespace *fs_userns, vfsgid_t vfsgid)
{
gid_t gid;
struct user_namespace *mnt_userns = idmap->owner;
if (idmap == &nop_mnt_idmap)
return AS_KGIDT(vfsgid);
gid = from_kgid(mnt_userns, AS_KGIDT(vfsgid));
gid = map_id_up(&idmap->gid_map, __vfsgid_val(vfsgid));
if (gid == (gid_t)-1)
return INVALID_GID;
if (initial_idmapping(fs_userns))
@ -195,16 +198,91 @@ int vfsgid_in_group_p(vfsgid_t vfsgid)
#endif
EXPORT_SYMBOL_GPL(vfsgid_in_group_p);
static int copy_mnt_idmap(struct uid_gid_map *map_from,
struct uid_gid_map *map_to)
{
struct uid_gid_extent *forward, *reverse;
u32 nr_extents = READ_ONCE(map_from->nr_extents);
/* Pairs with smp_wmb() when writing the idmapping. */
smp_rmb();
/*
* Don't blindly copy @map_to into @map_from if nr_extents is
* smaller or equal to UID_GID_MAP_MAX_BASE_EXTENTS. Since we
* read @nr_extents someone could have written an idmapping and
* then we might end up with inconsistent data. So just don't do
* anything at all.
*/
if (nr_extents == 0)
return 0;
/*
* Here we know that nr_extents is greater than zero which means
* a map has been written. Since idmappings can't be changed
* once they have been written we know that we can safely copy
* from @map_to into @map_from.
*/
if (nr_extents <= UID_GID_MAP_MAX_BASE_EXTENTS) {
*map_to = *map_from;
return 0;
}
forward = kmemdup(map_from->forward,
nr_extents * sizeof(struct uid_gid_extent),
GFP_KERNEL_ACCOUNT);
if (!forward)
return -ENOMEM;
reverse = kmemdup(map_from->reverse,
nr_extents * sizeof(struct uid_gid_extent),
GFP_KERNEL_ACCOUNT);
if (!reverse) {
kfree(forward);
return -ENOMEM;
}
/*
* The idmapping isn't exposed anywhere so we don't need to care
* about ordering between extent pointers and @nr_extents
* initialization.
*/
map_to->forward = forward;
map_to->reverse = reverse;
map_to->nr_extents = nr_extents;
return 0;
}
static void free_mnt_idmap(struct mnt_idmap *idmap)
{
if (idmap->uid_map.nr_extents > UID_GID_MAP_MAX_BASE_EXTENTS) {
kfree(idmap->uid_map.forward);
kfree(idmap->uid_map.reverse);
}
if (idmap->gid_map.nr_extents > UID_GID_MAP_MAX_BASE_EXTENTS) {
kfree(idmap->gid_map.forward);
kfree(idmap->gid_map.reverse);
}
kfree(idmap);
}
struct mnt_idmap *alloc_mnt_idmap(struct user_namespace *mnt_userns)
{
struct mnt_idmap *idmap;
int ret;
idmap = kzalloc(sizeof(struct mnt_idmap), GFP_KERNEL_ACCOUNT);
if (!idmap)
return ERR_PTR(-ENOMEM);
idmap->owner = get_user_ns(mnt_userns);
refcount_set(&idmap->count, 1);
ret = copy_mnt_idmap(&mnt_userns->uid_map, &idmap->uid_map);
if (!ret)
ret = copy_mnt_idmap(&mnt_userns->gid_map, &idmap->gid_map);
if (ret) {
free_mnt_idmap(idmap);
idmap = ERR_PTR(ret);
}
return idmap;
}
@ -234,9 +312,7 @@ EXPORT_SYMBOL_GPL(mnt_idmap_get);
*/
void mnt_idmap_put(struct mnt_idmap *idmap)
{
if (idmap != &nop_mnt_idmap && refcount_dec_and_test(&idmap->count)) {
put_user_ns(idmap->owner);
kfree(idmap);
}
if (idmap != &nop_mnt_idmap && refcount_dec_and_test(&idmap->count))
free_mnt_idmap(idmap);
}
EXPORT_SYMBOL_GPL(mnt_idmap_put);

View File

@ -17,6 +17,7 @@
struct user_namespace;
extern struct user_namespace init_user_ns;
struct uid_gid_map;
typedef struct {
uid_t val;
@ -138,6 +139,9 @@ static inline bool kgid_has_mapping(struct user_namespace *ns, kgid_t gid)
return from_kgid(ns, gid) != (gid_t) -1;
}
u32 map_id_down(struct uid_gid_map *map, u32 id);
u32 map_id_up(struct uid_gid_map *map, u32 id);
#else
static inline kuid_t make_kuid(struct user_namespace *from, uid_t uid)
@ -186,6 +190,15 @@ static inline bool kgid_has_mapping(struct user_namespace *ns, kgid_t gid)
return gid_valid(gid);
}
static inline u32 map_id_down(struct uid_gid_map *map, u32 id)
{
return id;
}
static inline u32 map_id_up(struct uid_gid_map *map, u32 id)
{
return id;
}
#endif /* CONFIG_USER_NS */
#endif /* _LINUX_UIDGID_H */

View File

@ -332,7 +332,7 @@ static u32 map_id_range_down(struct uid_gid_map *map, u32 id, u32 count)
return id;
}
static u32 map_id_down(struct uid_gid_map *map, u32 id)
u32 map_id_down(struct uid_gid_map *map, u32 id)
{
return map_id_range_down(map, id, 1);
}
@ -375,7 +375,7 @@ map_id_up_max(unsigned extents, struct uid_gid_map *map, u32 id)
sizeof(struct uid_gid_extent), cmp_map_id);
}
static u32 map_id_up(struct uid_gid_map *map, u32 id)
u32 map_id_up(struct uid_gid_map *map, u32 id)
{
struct uid_gid_extent *extent;
unsigned extents = map->nr_extents;