1

bcachefs: BTREE_ITER_WITH_KEY_CACHE

This is the start of cache coherency with the btree key cache - this
adds a btree iterator flag that causes lookups to also check the key
cache when we're iterating over the btree (not iterating over the key
cache).

Note that we could still race with another thread creating at item in
the key cache and updating it, since we aren't holding the key cache
locked if it wasn't found. The next patch for the update path will
address this by causing the transaction to restart if the key cache is
found to be dirty.

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
Kent Overstreet 2022-02-06 23:15:12 -05:00 committed by Kent Overstreet
parent 45e4cd9e3a
commit f7b6ca23b6
4 changed files with 93 additions and 27 deletions

View File

@ -1964,13 +1964,13 @@ inline struct bkey_s_c bch2_btree_path_peek_slot(struct btree_path *path, struct
struct bkey_s_c k; struct bkey_s_c k;
BUG_ON(path->uptodate != BTREE_ITER_UPTODATE);
if (!path->cached) { if (!path->cached) {
struct btree_path_level *l = path_l(path); struct btree_path_level *l = path_l(path);
struct bkey_packed *_k = struct bkey_packed *_k;
bch2_btree_node_iter_peek_all(&l->iter, l->b);
EBUG_ON(path->uptodate != BTREE_ITER_UPTODATE);
_k = bch2_btree_node_iter_peek_all(&l->iter, l->b);
k = _k ? bkey_disassemble(l->b, _k, u) : bkey_s_c_null; k = _k ? bkey_disassemble(l->b, _k, u) : bkey_s_c_null;
EBUG_ON(k.k && bkey_deleted(k.k) && bpos_cmp(k.k->p, path->pos) == 0); EBUG_ON(k.k && bkey_deleted(k.k) && bpos_cmp(k.k->p, path->pos) == 0);
@ -1980,12 +1980,15 @@ inline struct bkey_s_c bch2_btree_path_peek_slot(struct btree_path *path, struct
} else { } else {
struct bkey_cached *ck = (void *) path->l[0].b; struct bkey_cached *ck = (void *) path->l[0].b;
EBUG_ON(path->btree_id != ck->key.btree_id || EBUG_ON(ck &&
bkey_cmp(path->pos, ck->key.pos)); (path->btree_id != ck->key.btree_id ||
bkey_cmp(path->pos, ck->key.pos)));
/* BTREE_ITER_CACHED_NOFILL? */ /* BTREE_ITER_CACHED_NOFILL|BTREE_ITER_CACHED_NOCREATE? */
if (unlikely(!ck->valid)) if (unlikely(!ck || !ck->valid))
goto hole; return bkey_s_c_null;
EBUG_ON(path->uptodate != BTREE_ITER_UPTODATE);
*u = ck->k->k; *u = ck->k->k;
k = bkey_i_to_s_c(ck->k); k = bkey_i_to_s_c(ck->k);
@ -2233,11 +2236,43 @@ struct bkey_s_c btree_trans_peek_journal(struct btree_trans *trans,
return k; return k;
} }
/*
* Checks btree key cache for key at iter->pos and returns it if present, or
* bkey_s_c_null:
*/
static noinline
struct bkey_s_c btree_trans_peek_key_cache(struct btree_iter *iter, struct bpos pos)
{
struct btree_trans *trans = iter->trans;
struct bch_fs *c = trans->c;
struct bkey u;
int ret;
if (!bch2_btree_key_cache_find(c, iter->btree_id, pos))
return bkey_s_c_null;
if (!iter->key_cache_path)
iter->key_cache_path = bch2_path_get(trans, iter->btree_id, pos,
iter->flags & BTREE_ITER_INTENT, 0,
iter->flags|BTREE_ITER_CACHED);
iter->key_cache_path = bch2_btree_path_set_pos(trans, iter->key_cache_path, pos,
iter->flags & BTREE_ITER_INTENT);
ret = bch2_btree_path_traverse(trans, iter->key_cache_path, iter->flags|BTREE_ITER_CACHED);
if (unlikely(ret))
return bkey_s_c_err(ret);
iter->key_cache_path->should_be_locked = true;
return bch2_btree_path_peek_slot(iter->key_cache_path, &u);
}
static struct bkey_s_c __bch2_btree_iter_peek(struct btree_iter *iter, struct bpos search_key) static struct bkey_s_c __bch2_btree_iter_peek(struct btree_iter *iter, struct bpos search_key)
{ {
struct btree_trans *trans = iter->trans; struct btree_trans *trans = iter->trans;
struct bkey_i *next_update; struct bkey_i *next_update;
struct bkey_s_c k; struct bkey_s_c k, k2;
int ret; int ret;
EBUG_ON(iter->path->cached || iter->path->level); EBUG_ON(iter->path->cached || iter->path->level);
@ -2255,8 +2290,24 @@ static struct bkey_s_c __bch2_btree_iter_peek(struct btree_iter *iter, struct bp
goto out; goto out;
} }
iter->path->should_be_locked = true;
k = btree_path_level_peek_all(trans->c, &iter->path->l[0], &iter->k); k = btree_path_level_peek_all(trans->c, &iter->path->l[0], &iter->k);
if (unlikely(iter->flags & BTREE_ITER_WITH_KEY_CACHE) &&
k.k &&
(k2 = btree_trans_peek_key_cache(iter, k.k->p)).k) {
ret = bkey_err(k2);
if (ret) {
k = k2;
bch2_btree_iter_set_pos(iter, iter->pos);
goto out;
}
k = k2;
iter->k = *k.k;
}
if (unlikely(iter->flags & BTREE_ITER_WITH_JOURNAL)) if (unlikely(iter->flags & BTREE_ITER_WITH_JOURNAL))
k = btree_trans_peek_journal(trans, iter, k); k = btree_trans_peek_journal(trans, iter, k);
@ -2603,6 +2654,13 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter)
(k = btree_trans_peek_slot_journal(trans, iter)).k) (k = btree_trans_peek_slot_journal(trans, iter)).k)
goto out; goto out;
if (unlikely(iter->flags & BTREE_ITER_WITH_KEY_CACHE) &&
(k = btree_trans_peek_key_cache(iter, iter->pos)).k) {
if (!bkey_err(k))
iter->k = *k.k;
goto out;
}
k = bch2_btree_path_peek_slot(iter->path, &iter->k); k = bch2_btree_path_peek_slot(iter->path, &iter->k);
} else { } else {
struct bpos next; struct bpos next;
@ -2806,8 +2864,12 @@ void bch2_trans_iter_exit(struct btree_trans *trans, struct btree_iter *iter)
if (iter->update_path) if (iter->update_path)
bch2_path_put(trans, iter->update_path, bch2_path_put(trans, iter->update_path,
iter->flags & BTREE_ITER_INTENT); iter->flags & BTREE_ITER_INTENT);
if (iter->key_cache_path)
bch2_path_put(trans, iter->key_cache_path,
iter->flags & BTREE_ITER_INTENT);
iter->path = NULL; iter->path = NULL;
iter->update_path = NULL; iter->update_path = NULL;
iter->key_cache_path = NULL;
} }
static void __bch2_trans_iter_init(struct btree_trans *trans, static void __bch2_trans_iter_init(struct btree_trans *trans,
@ -2834,12 +2896,16 @@ static void __bch2_trans_iter_init(struct btree_trans *trans,
if (trans->journal_replay_not_finished) if (trans->journal_replay_not_finished)
flags |= BTREE_ITER_WITH_JOURNAL; flags |= BTREE_ITER_WITH_JOURNAL;
if (!btree_id_cached(trans->c, btree_id)) if (!btree_id_cached(trans->c, btree_id)) {
flags &= ~BTREE_ITER_CACHED; flags &= ~BTREE_ITER_CACHED;
flags &= ~BTREE_ITER_WITH_KEY_CACHE;
} else if (!(flags & BTREE_ITER_CACHED))
flags |= BTREE_ITER_WITH_KEY_CACHE;
iter->trans = trans; iter->trans = trans;
iter->path = NULL; iter->path = NULL;
iter->update_path = NULL; iter->update_path = NULL;
iter->key_cache_path = NULL;
iter->btree_id = btree_id; iter->btree_id = btree_id;
iter->min_depth = depth; iter->min_depth = depth;
iter->flags = flags; iter->flags = flags;
@ -2887,6 +2953,7 @@ void bch2_trans_copy_iter(struct btree_iter *dst, struct btree_iter *src)
__btree_path_get(src->path, src->flags & BTREE_ITER_INTENT); __btree_path_get(src->path, src->flags & BTREE_ITER_INTENT);
if (src->update_path) if (src->update_path)
__btree_path_get(src->update_path, src->flags & BTREE_ITER_INTENT); __btree_path_get(src->update_path, src->flags & BTREE_ITER_INTENT);
dst->key_cache_path = NULL;
} }
void *bch2_trans_kmalloc(struct btree_trans *trans, size_t size) void *bch2_trans_kmalloc(struct btree_trans *trans, size_t size)

View File

@ -50,11 +50,6 @@ static inline struct btree *btree_node_parent(struct btree_path *path,
return btree_path_node(path, b->c.level + 1); return btree_path_node(path, b->c.level + 1);
} }
static inline int btree_iter_err(const struct btree_iter *iter)
{
return iter->flags & BTREE_ITER_ERROR ? -EIO : 0;
}
/* Iterate over paths within a transaction: */ /* Iterate over paths within a transaction: */
void __bch2_btree_trans_sort_paths(struct btree_trans *); void __bch2_btree_trans_sort_paths(struct btree_trans *);

View File

@ -209,19 +209,20 @@ static int btree_key_cache_fill(struct btree_trans *trans,
struct btree_path *ck_path, struct btree_path *ck_path,
struct bkey_cached *ck) struct bkey_cached *ck)
{ {
struct btree_iter iter; struct btree_path *path;
struct bkey_s_c k; struct bkey_s_c k;
unsigned new_u64s = 0; unsigned new_u64s = 0;
struct bkey_i *new_k = NULL; struct bkey_i *new_k = NULL;
struct bkey u;
int ret; int ret;
bch2_trans_iter_init(trans, &iter, ck->key.btree_id, path = bch2_path_get(trans, ck->key.btree_id, ck->key.pos, 0, 0, 0);
ck->key.pos, BTREE_ITER_SLOTS); ret = bch2_btree_path_traverse(trans, path, 0);
k = bch2_btree_iter_peek_slot(&iter);
ret = bkey_err(k);
if (ret) if (ret)
goto err; goto err;
k = bch2_btree_path_peek_slot(path, &u);
if (!bch2_btree_node_relock(trans, ck_path, 0)) { if (!bch2_btree_node_relock(trans, ck_path, 0)) {
trace_trans_restart_relock_key_cache_fill(trans->fn, trace_trans_restart_relock_key_cache_fill(trans->fn,
_THIS_IP_, ck_path->btree_id, &ck_path->pos); _THIS_IP_, ck_path->btree_id, &ck_path->pos);
@ -262,9 +263,9 @@ static int btree_key_cache_fill(struct btree_trans *trans,
bch2_btree_node_unlock_write(trans, ck_path, ck_path->l[0].b); bch2_btree_node_unlock_write(trans, ck_path, ck_path->l[0].b);
/* We're not likely to need this iterator again: */ /* We're not likely to need this iterator again: */
set_btree_iter_dontneed(&iter); path->preserve = false;
err: err:
bch2_trans_iter_exit(trans, &iter); bch2_path_put(trans, path, 0);
return ret; return ret;
} }
@ -385,6 +386,8 @@ static int btree_key_cache_flush_pos(struct btree_trans *trans,
BTREE_ITER_CACHED_NOFILL| BTREE_ITER_CACHED_NOFILL|
BTREE_ITER_CACHED_NOCREATE| BTREE_ITER_CACHED_NOCREATE|
BTREE_ITER_INTENT); BTREE_ITER_INTENT);
b_iter.flags &= ~BTREE_ITER_WITH_KEY_CACHE;
ret = bch2_btree_iter_traverse(&c_iter); ret = bch2_btree_iter_traverse(&c_iter);
if (ret) if (ret)
goto out; goto out;

View File

@ -202,10 +202,10 @@ struct btree_node_iter {
*/ */
#define BTREE_ITER_IS_EXTENTS (1 << 4) #define BTREE_ITER_IS_EXTENTS (1 << 4)
#define BTREE_ITER_NOT_EXTENTS (1 << 5) #define BTREE_ITER_NOT_EXTENTS (1 << 5)
#define BTREE_ITER_ERROR (1 << 6) #define BTREE_ITER_CACHED (1 << 6)
#define BTREE_ITER_CACHED (1 << 7) #define BTREE_ITER_CACHED_NOFILL (1 << 7)
#define BTREE_ITER_CACHED_NOFILL (1 << 8) #define BTREE_ITER_CACHED_NOCREATE (1 << 8)
#define BTREE_ITER_CACHED_NOCREATE (1 << 9) #define BTREE_ITER_WITH_KEY_CACHE (1 << 9)
#define BTREE_ITER_WITH_UPDATES (1 << 10) #define BTREE_ITER_WITH_UPDATES (1 << 10)
#define BTREE_ITER_WITH_JOURNAL (1 << 11) #define BTREE_ITER_WITH_JOURNAL (1 << 11)
#define __BTREE_ITER_ALL_SNAPSHOTS (1 << 12) #define __BTREE_ITER_ALL_SNAPSHOTS (1 << 12)
@ -277,6 +277,7 @@ struct btree_iter {
struct btree_trans *trans; struct btree_trans *trans;
struct btree_path *path; struct btree_path *path;
struct btree_path *update_path; struct btree_path *update_path;
struct btree_path *key_cache_path;
enum btree_id btree_id:4; enum btree_id btree_id:4;
unsigned min_depth:4; unsigned min_depth:4;