1
linux/net/ceph/striper.c
Ilya Dryomov 22e8bd51bb rbd: support for object-map and fast-diff
Speed up reads, discards and zeroouts through RBD_OBJ_FLAG_MAY_EXIST
and RBD_OBJ_FLAG_NOOP_FOR_NONEXISTENT based on object map.

Invalid object maps are not trusted, but still updated.  Note that we
never iterate, resize or invalidate object maps.  If object-map feature
is enabled but object map fails to load, we just fail the requester
(either "rbd map" or I/O, by way of post-acquire action).

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
2019-07-08 14:01:45 +02:00

279 lines
7.7 KiB
C

/* SPDX-License-Identifier: GPL-2.0 */
#include <linux/ceph/ceph_debug.h>
#include <linux/math64.h>
#include <linux/slab.h>
#include <linux/ceph/striper.h>
#include <linux/ceph/types.h>
/*
* Map a file extent to a stripe unit within an object.
* Fill in objno, offset into object, and object extent length (i.e. the
* number of bytes mapped, less than or equal to @l->stripe_unit).
*
* Example for stripe_count = 3, stripes_per_object = 4:
*
* blockno | 0 3 6 9 | 1 4 7 10 | 2 5 8 11 | 12 15 18 21 | 13 16 19
* stripeno | 0 1 2 3 | 0 1 2 3 | 0 1 2 3 | 4 5 6 7 | 4 5 6
* stripepos | 0 | 1 | 2 | 0 | 1
* objno | 0 | 1 | 2 | 3 | 4
* objsetno | 0 | 1
*/
void ceph_calc_file_object_mapping(struct ceph_file_layout *l,
u64 off, u64 len,
u64 *objno, u64 *objoff, u32 *xlen)
{
u32 stripes_per_object = l->object_size / l->stripe_unit;
u64 blockno; /* which su in the file (i.e. globally) */
u32 blockoff; /* offset into su */
u64 stripeno; /* which stripe */
u32 stripepos; /* which su in the stripe,
which object in the object set */
u64 objsetno; /* which object set */
u32 objsetpos; /* which stripe in the object set */
blockno = div_u64_rem(off, l->stripe_unit, &blockoff);
stripeno = div_u64_rem(blockno, l->stripe_count, &stripepos);
objsetno = div_u64_rem(stripeno, stripes_per_object, &objsetpos);
*objno = objsetno * l->stripe_count + stripepos;
*objoff = objsetpos * l->stripe_unit + blockoff;
*xlen = min_t(u64, len, l->stripe_unit - blockoff);
}
EXPORT_SYMBOL(ceph_calc_file_object_mapping);
/*
* Return the last extent with given objno (@object_extents is sorted
* by objno). If not found, return NULL and set @add_pos so that the
* new extent can be added with list_add(add_pos, new_ex).
*/
static struct ceph_object_extent *
lookup_last(struct list_head *object_extents, u64 objno,
struct list_head **add_pos)
{
struct list_head *pos;
list_for_each_prev(pos, object_extents) {
struct ceph_object_extent *ex =
list_entry(pos, typeof(*ex), oe_item);
if (ex->oe_objno == objno)
return ex;
if (ex->oe_objno < objno)
break;
}
*add_pos = pos;
return NULL;
}
static struct ceph_object_extent *
lookup_containing(struct list_head *object_extents, u64 objno,
u64 objoff, u32 xlen)
{
struct ceph_object_extent *ex;
list_for_each_entry(ex, object_extents, oe_item) {
if (ex->oe_objno == objno &&
ex->oe_off <= objoff &&
ex->oe_off + ex->oe_len >= objoff + xlen) /* paranoia */
return ex;
if (ex->oe_objno > objno)
break;
}
return NULL;
}
/*
* Map a file extent to a sorted list of object extents.
*
* We want only one (or as few as possible) object extents per object.
* Adjacent object extents will be merged together, each returned object
* extent may reverse map to multiple different file extents.
*
* Call @alloc_fn for each new object extent and @action_fn for each
* mapped stripe unit, whether it was merged into an already allocated
* object extent or started a new object extent.
*
* Newly allocated object extents are added to @object_extents.
* To keep @object_extents sorted, successive calls to this function
* must map successive file extents (i.e. the list of file extents that
* are mapped using the same @object_extents must be sorted).
*
* The caller is responsible for @object_extents.
*/
int ceph_file_to_extents(struct ceph_file_layout *l, u64 off, u64 len,
struct list_head *object_extents,
struct ceph_object_extent *alloc_fn(void *arg),
void *alloc_arg,
ceph_object_extent_fn_t action_fn,
void *action_arg)
{
struct ceph_object_extent *last_ex, *ex;
while (len) {
struct list_head *add_pos = NULL;
u64 objno, objoff;
u32 xlen;
ceph_calc_file_object_mapping(l, off, len, &objno, &objoff,
&xlen);
last_ex = lookup_last(object_extents, objno, &add_pos);
if (!last_ex || last_ex->oe_off + last_ex->oe_len != objoff) {
ex = alloc_fn(alloc_arg);
if (!ex)
return -ENOMEM;
ex->oe_objno = objno;
ex->oe_off = objoff;
ex->oe_len = xlen;
if (action_fn)
action_fn(ex, xlen, action_arg);
if (!last_ex)
list_add(&ex->oe_item, add_pos);
else
list_add(&ex->oe_item, &last_ex->oe_item);
} else {
last_ex->oe_len += xlen;
if (action_fn)
action_fn(last_ex, xlen, action_arg);
}
off += xlen;
len -= xlen;
}
for (last_ex = list_first_entry(object_extents, typeof(*ex), oe_item),
ex = list_next_entry(last_ex, oe_item);
&ex->oe_item != object_extents;
last_ex = ex, ex = list_next_entry(ex, oe_item)) {
if (last_ex->oe_objno > ex->oe_objno ||
(last_ex->oe_objno == ex->oe_objno &&
last_ex->oe_off + last_ex->oe_len >= ex->oe_off)) {
WARN(1, "%s: object_extents list not sorted!\n",
__func__);
return -EINVAL;
}
}
return 0;
}
EXPORT_SYMBOL(ceph_file_to_extents);
/*
* A stripped down, non-allocating version of ceph_file_to_extents(),
* for when @object_extents is already populated.
*/
int ceph_iterate_extents(struct ceph_file_layout *l, u64 off, u64 len,
struct list_head *object_extents,
ceph_object_extent_fn_t action_fn,
void *action_arg)
{
while (len) {
struct ceph_object_extent *ex;
u64 objno, objoff;
u32 xlen;
ceph_calc_file_object_mapping(l, off, len, &objno, &objoff,
&xlen);
ex = lookup_containing(object_extents, objno, objoff, xlen);
if (!ex) {
WARN(1, "%s: objno %llu %llu~%u not found!\n",
__func__, objno, objoff, xlen);
return -EINVAL;
}
action_fn(ex, xlen, action_arg);
off += xlen;
len -= xlen;
}
return 0;
}
EXPORT_SYMBOL(ceph_iterate_extents);
/*
* Reverse map an object extent to a sorted list of file extents.
*
* On success, the caller is responsible for:
*
* kfree(file_extents)
*/
int ceph_extent_to_file(struct ceph_file_layout *l,
u64 objno, u64 objoff, u64 objlen,
struct ceph_file_extent **file_extents,
u32 *num_file_extents)
{
u32 stripes_per_object = l->object_size / l->stripe_unit;
u64 blockno; /* which su */
u32 blockoff; /* offset into su */
u64 stripeno; /* which stripe */
u32 stripepos; /* which su in the stripe,
which object in the object set */
u64 objsetno; /* which object set */
u32 i = 0;
if (!objlen) {
*file_extents = NULL;
*num_file_extents = 0;
return 0;
}
*num_file_extents = DIV_ROUND_UP_ULL(objoff + objlen, l->stripe_unit) -
DIV_ROUND_DOWN_ULL(objoff, l->stripe_unit);
*file_extents = kmalloc_array(*num_file_extents, sizeof(**file_extents),
GFP_NOIO);
if (!*file_extents)
return -ENOMEM;
div_u64_rem(objoff, l->stripe_unit, &blockoff);
while (objlen) {
u64 off, len;
objsetno = div_u64_rem(objno, l->stripe_count, &stripepos);
stripeno = div_u64(objoff, l->stripe_unit) +
objsetno * stripes_per_object;
blockno = stripeno * l->stripe_count + stripepos;
off = blockno * l->stripe_unit + blockoff;
len = min_t(u64, objlen, l->stripe_unit - blockoff);
(*file_extents)[i].fe_off = off;
(*file_extents)[i].fe_len = len;
blockoff = 0;
objoff += len;
objlen -= len;
i++;
}
BUG_ON(i != *num_file_extents);
return 0;
}
EXPORT_SYMBOL(ceph_extent_to_file);
u64 ceph_get_num_objects(struct ceph_file_layout *l, u64 size)
{
u64 period = (u64)l->stripe_count * l->object_size;
u64 num_periods = DIV64_U64_ROUND_UP(size, period);
u64 remainder_bytes;
u64 remainder_objs = 0;
div64_u64_rem(size, period, &remainder_bytes);
if (remainder_bytes > 0 &&
remainder_bytes < (u64)l->stripe_count * l->stripe_unit)
remainder_objs = l->stripe_count -
DIV_ROUND_UP_ULL(remainder_bytes, l->stripe_unit);
return num_periods * l->stripe_count - remainder_objs;
}
EXPORT_SYMBOL(ceph_get_num_objects);