bcachefs: BTREE_ITER_WITH_KEY_CACHE
This is the start of cache coherency with the btree key cache - this adds a btree iterator flag that causes lookups to also check the key cache when we're iterating over the btree (not iterating over the key cache). Note that we could still race with another thread creating at item in the key cache and updating it, since we aren't holding the key cache locked if it wasn't found. The next patch for the update path will address this by causing the transaction to restart if the key cache is found to be dirty. Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
parent
45e4cd9e3a
commit
f7b6ca23b6
@ -1964,13 +1964,13 @@ inline struct bkey_s_c bch2_btree_path_peek_slot(struct btree_path *path, struct
|
|||||||
|
|
||||||
struct bkey_s_c k;
|
struct bkey_s_c k;
|
||||||
|
|
||||||
BUG_ON(path->uptodate != BTREE_ITER_UPTODATE);
|
|
||||||
|
|
||||||
if (!path->cached) {
|
if (!path->cached) {
|
||||||
struct btree_path_level *l = path_l(path);
|
struct btree_path_level *l = path_l(path);
|
||||||
struct bkey_packed *_k =
|
struct bkey_packed *_k;
|
||||||
bch2_btree_node_iter_peek_all(&l->iter, l->b);
|
|
||||||
|
|
||||||
|
EBUG_ON(path->uptodate != BTREE_ITER_UPTODATE);
|
||||||
|
|
||||||
|
_k = bch2_btree_node_iter_peek_all(&l->iter, l->b);
|
||||||
k = _k ? bkey_disassemble(l->b, _k, u) : bkey_s_c_null;
|
k = _k ? bkey_disassemble(l->b, _k, u) : bkey_s_c_null;
|
||||||
|
|
||||||
EBUG_ON(k.k && bkey_deleted(k.k) && bpos_cmp(k.k->p, path->pos) == 0);
|
EBUG_ON(k.k && bkey_deleted(k.k) && bpos_cmp(k.k->p, path->pos) == 0);
|
||||||
@ -1980,12 +1980,15 @@ inline struct bkey_s_c bch2_btree_path_peek_slot(struct btree_path *path, struct
|
|||||||
} else {
|
} else {
|
||||||
struct bkey_cached *ck = (void *) path->l[0].b;
|
struct bkey_cached *ck = (void *) path->l[0].b;
|
||||||
|
|
||||||
EBUG_ON(path->btree_id != ck->key.btree_id ||
|
EBUG_ON(ck &&
|
||||||
bkey_cmp(path->pos, ck->key.pos));
|
(path->btree_id != ck->key.btree_id ||
|
||||||
|
bkey_cmp(path->pos, ck->key.pos)));
|
||||||
|
|
||||||
/* BTREE_ITER_CACHED_NOFILL? */
|
/* BTREE_ITER_CACHED_NOFILL|BTREE_ITER_CACHED_NOCREATE? */
|
||||||
if (unlikely(!ck->valid))
|
if (unlikely(!ck || !ck->valid))
|
||||||
goto hole;
|
return bkey_s_c_null;
|
||||||
|
|
||||||
|
EBUG_ON(path->uptodate != BTREE_ITER_UPTODATE);
|
||||||
|
|
||||||
*u = ck->k->k;
|
*u = ck->k->k;
|
||||||
k = bkey_i_to_s_c(ck->k);
|
k = bkey_i_to_s_c(ck->k);
|
||||||
@ -2233,11 +2236,43 @@ struct bkey_s_c btree_trans_peek_journal(struct btree_trans *trans,
|
|||||||
return k;
|
return k;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Checks btree key cache for key at iter->pos and returns it if present, or
|
||||||
|
* bkey_s_c_null:
|
||||||
|
*/
|
||||||
|
static noinline
|
||||||
|
struct bkey_s_c btree_trans_peek_key_cache(struct btree_iter *iter, struct bpos pos)
|
||||||
|
{
|
||||||
|
struct btree_trans *trans = iter->trans;
|
||||||
|
struct bch_fs *c = trans->c;
|
||||||
|
struct bkey u;
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
if (!bch2_btree_key_cache_find(c, iter->btree_id, pos))
|
||||||
|
return bkey_s_c_null;
|
||||||
|
|
||||||
|
if (!iter->key_cache_path)
|
||||||
|
iter->key_cache_path = bch2_path_get(trans, iter->btree_id, pos,
|
||||||
|
iter->flags & BTREE_ITER_INTENT, 0,
|
||||||
|
iter->flags|BTREE_ITER_CACHED);
|
||||||
|
|
||||||
|
iter->key_cache_path = bch2_btree_path_set_pos(trans, iter->key_cache_path, pos,
|
||||||
|
iter->flags & BTREE_ITER_INTENT);
|
||||||
|
|
||||||
|
ret = bch2_btree_path_traverse(trans, iter->key_cache_path, iter->flags|BTREE_ITER_CACHED);
|
||||||
|
if (unlikely(ret))
|
||||||
|
return bkey_s_c_err(ret);
|
||||||
|
|
||||||
|
iter->key_cache_path->should_be_locked = true;
|
||||||
|
|
||||||
|
return bch2_btree_path_peek_slot(iter->key_cache_path, &u);
|
||||||
|
}
|
||||||
|
|
||||||
static struct bkey_s_c __bch2_btree_iter_peek(struct btree_iter *iter, struct bpos search_key)
|
static struct bkey_s_c __bch2_btree_iter_peek(struct btree_iter *iter, struct bpos search_key)
|
||||||
{
|
{
|
||||||
struct btree_trans *trans = iter->trans;
|
struct btree_trans *trans = iter->trans;
|
||||||
struct bkey_i *next_update;
|
struct bkey_i *next_update;
|
||||||
struct bkey_s_c k;
|
struct bkey_s_c k, k2;
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
EBUG_ON(iter->path->cached || iter->path->level);
|
EBUG_ON(iter->path->cached || iter->path->level);
|
||||||
@ -2255,8 +2290,24 @@ static struct bkey_s_c __bch2_btree_iter_peek(struct btree_iter *iter, struct bp
|
|||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
iter->path->should_be_locked = true;
|
||||||
|
|
||||||
k = btree_path_level_peek_all(trans->c, &iter->path->l[0], &iter->k);
|
k = btree_path_level_peek_all(trans->c, &iter->path->l[0], &iter->k);
|
||||||
|
|
||||||
|
if (unlikely(iter->flags & BTREE_ITER_WITH_KEY_CACHE) &&
|
||||||
|
k.k &&
|
||||||
|
(k2 = btree_trans_peek_key_cache(iter, k.k->p)).k) {
|
||||||
|
ret = bkey_err(k2);
|
||||||
|
if (ret) {
|
||||||
|
k = k2;
|
||||||
|
bch2_btree_iter_set_pos(iter, iter->pos);
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
k = k2;
|
||||||
|
iter->k = *k.k;
|
||||||
|
}
|
||||||
|
|
||||||
if (unlikely(iter->flags & BTREE_ITER_WITH_JOURNAL))
|
if (unlikely(iter->flags & BTREE_ITER_WITH_JOURNAL))
|
||||||
k = btree_trans_peek_journal(trans, iter, k);
|
k = btree_trans_peek_journal(trans, iter, k);
|
||||||
|
|
||||||
@ -2603,6 +2654,13 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter)
|
|||||||
(k = btree_trans_peek_slot_journal(trans, iter)).k)
|
(k = btree_trans_peek_slot_journal(trans, iter)).k)
|
||||||
goto out;
|
goto out;
|
||||||
|
|
||||||
|
if (unlikely(iter->flags & BTREE_ITER_WITH_KEY_CACHE) &&
|
||||||
|
(k = btree_trans_peek_key_cache(iter, iter->pos)).k) {
|
||||||
|
if (!bkey_err(k))
|
||||||
|
iter->k = *k.k;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
k = bch2_btree_path_peek_slot(iter->path, &iter->k);
|
k = bch2_btree_path_peek_slot(iter->path, &iter->k);
|
||||||
} else {
|
} else {
|
||||||
struct bpos next;
|
struct bpos next;
|
||||||
@ -2806,8 +2864,12 @@ void bch2_trans_iter_exit(struct btree_trans *trans, struct btree_iter *iter)
|
|||||||
if (iter->update_path)
|
if (iter->update_path)
|
||||||
bch2_path_put(trans, iter->update_path,
|
bch2_path_put(trans, iter->update_path,
|
||||||
iter->flags & BTREE_ITER_INTENT);
|
iter->flags & BTREE_ITER_INTENT);
|
||||||
|
if (iter->key_cache_path)
|
||||||
|
bch2_path_put(trans, iter->key_cache_path,
|
||||||
|
iter->flags & BTREE_ITER_INTENT);
|
||||||
iter->path = NULL;
|
iter->path = NULL;
|
||||||
iter->update_path = NULL;
|
iter->update_path = NULL;
|
||||||
|
iter->key_cache_path = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void __bch2_trans_iter_init(struct btree_trans *trans,
|
static void __bch2_trans_iter_init(struct btree_trans *trans,
|
||||||
@ -2834,12 +2896,16 @@ static void __bch2_trans_iter_init(struct btree_trans *trans,
|
|||||||
if (trans->journal_replay_not_finished)
|
if (trans->journal_replay_not_finished)
|
||||||
flags |= BTREE_ITER_WITH_JOURNAL;
|
flags |= BTREE_ITER_WITH_JOURNAL;
|
||||||
|
|
||||||
if (!btree_id_cached(trans->c, btree_id))
|
if (!btree_id_cached(trans->c, btree_id)) {
|
||||||
flags &= ~BTREE_ITER_CACHED;
|
flags &= ~BTREE_ITER_CACHED;
|
||||||
|
flags &= ~BTREE_ITER_WITH_KEY_CACHE;
|
||||||
|
} else if (!(flags & BTREE_ITER_CACHED))
|
||||||
|
flags |= BTREE_ITER_WITH_KEY_CACHE;
|
||||||
|
|
||||||
iter->trans = trans;
|
iter->trans = trans;
|
||||||
iter->path = NULL;
|
iter->path = NULL;
|
||||||
iter->update_path = NULL;
|
iter->update_path = NULL;
|
||||||
|
iter->key_cache_path = NULL;
|
||||||
iter->btree_id = btree_id;
|
iter->btree_id = btree_id;
|
||||||
iter->min_depth = depth;
|
iter->min_depth = depth;
|
||||||
iter->flags = flags;
|
iter->flags = flags;
|
||||||
@ -2887,6 +2953,7 @@ void bch2_trans_copy_iter(struct btree_iter *dst, struct btree_iter *src)
|
|||||||
__btree_path_get(src->path, src->flags & BTREE_ITER_INTENT);
|
__btree_path_get(src->path, src->flags & BTREE_ITER_INTENT);
|
||||||
if (src->update_path)
|
if (src->update_path)
|
||||||
__btree_path_get(src->update_path, src->flags & BTREE_ITER_INTENT);
|
__btree_path_get(src->update_path, src->flags & BTREE_ITER_INTENT);
|
||||||
|
dst->key_cache_path = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
void *bch2_trans_kmalloc(struct btree_trans *trans, size_t size)
|
void *bch2_trans_kmalloc(struct btree_trans *trans, size_t size)
|
||||||
|
@ -50,11 +50,6 @@ static inline struct btree *btree_node_parent(struct btree_path *path,
|
|||||||
return btree_path_node(path, b->c.level + 1);
|
return btree_path_node(path, b->c.level + 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline int btree_iter_err(const struct btree_iter *iter)
|
|
||||||
{
|
|
||||||
return iter->flags & BTREE_ITER_ERROR ? -EIO : 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Iterate over paths within a transaction: */
|
/* Iterate over paths within a transaction: */
|
||||||
|
|
||||||
void __bch2_btree_trans_sort_paths(struct btree_trans *);
|
void __bch2_btree_trans_sort_paths(struct btree_trans *);
|
||||||
|
@ -209,19 +209,20 @@ static int btree_key_cache_fill(struct btree_trans *trans,
|
|||||||
struct btree_path *ck_path,
|
struct btree_path *ck_path,
|
||||||
struct bkey_cached *ck)
|
struct bkey_cached *ck)
|
||||||
{
|
{
|
||||||
struct btree_iter iter;
|
struct btree_path *path;
|
||||||
struct bkey_s_c k;
|
struct bkey_s_c k;
|
||||||
unsigned new_u64s = 0;
|
unsigned new_u64s = 0;
|
||||||
struct bkey_i *new_k = NULL;
|
struct bkey_i *new_k = NULL;
|
||||||
|
struct bkey u;
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
bch2_trans_iter_init(trans, &iter, ck->key.btree_id,
|
path = bch2_path_get(trans, ck->key.btree_id, ck->key.pos, 0, 0, 0);
|
||||||
ck->key.pos, BTREE_ITER_SLOTS);
|
ret = bch2_btree_path_traverse(trans, path, 0);
|
||||||
k = bch2_btree_iter_peek_slot(&iter);
|
|
||||||
ret = bkey_err(k);
|
|
||||||
if (ret)
|
if (ret)
|
||||||
goto err;
|
goto err;
|
||||||
|
|
||||||
|
k = bch2_btree_path_peek_slot(path, &u);
|
||||||
|
|
||||||
if (!bch2_btree_node_relock(trans, ck_path, 0)) {
|
if (!bch2_btree_node_relock(trans, ck_path, 0)) {
|
||||||
trace_trans_restart_relock_key_cache_fill(trans->fn,
|
trace_trans_restart_relock_key_cache_fill(trans->fn,
|
||||||
_THIS_IP_, ck_path->btree_id, &ck_path->pos);
|
_THIS_IP_, ck_path->btree_id, &ck_path->pos);
|
||||||
@ -262,9 +263,9 @@ static int btree_key_cache_fill(struct btree_trans *trans,
|
|||||||
bch2_btree_node_unlock_write(trans, ck_path, ck_path->l[0].b);
|
bch2_btree_node_unlock_write(trans, ck_path, ck_path->l[0].b);
|
||||||
|
|
||||||
/* We're not likely to need this iterator again: */
|
/* We're not likely to need this iterator again: */
|
||||||
set_btree_iter_dontneed(&iter);
|
path->preserve = false;
|
||||||
err:
|
err:
|
||||||
bch2_trans_iter_exit(trans, &iter);
|
bch2_path_put(trans, path, 0);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -385,6 +386,8 @@ static int btree_key_cache_flush_pos(struct btree_trans *trans,
|
|||||||
BTREE_ITER_CACHED_NOFILL|
|
BTREE_ITER_CACHED_NOFILL|
|
||||||
BTREE_ITER_CACHED_NOCREATE|
|
BTREE_ITER_CACHED_NOCREATE|
|
||||||
BTREE_ITER_INTENT);
|
BTREE_ITER_INTENT);
|
||||||
|
b_iter.flags &= ~BTREE_ITER_WITH_KEY_CACHE;
|
||||||
|
|
||||||
ret = bch2_btree_iter_traverse(&c_iter);
|
ret = bch2_btree_iter_traverse(&c_iter);
|
||||||
if (ret)
|
if (ret)
|
||||||
goto out;
|
goto out;
|
||||||
|
@ -202,10 +202,10 @@ struct btree_node_iter {
|
|||||||
*/
|
*/
|
||||||
#define BTREE_ITER_IS_EXTENTS (1 << 4)
|
#define BTREE_ITER_IS_EXTENTS (1 << 4)
|
||||||
#define BTREE_ITER_NOT_EXTENTS (1 << 5)
|
#define BTREE_ITER_NOT_EXTENTS (1 << 5)
|
||||||
#define BTREE_ITER_ERROR (1 << 6)
|
#define BTREE_ITER_CACHED (1 << 6)
|
||||||
#define BTREE_ITER_CACHED (1 << 7)
|
#define BTREE_ITER_CACHED_NOFILL (1 << 7)
|
||||||
#define BTREE_ITER_CACHED_NOFILL (1 << 8)
|
#define BTREE_ITER_CACHED_NOCREATE (1 << 8)
|
||||||
#define BTREE_ITER_CACHED_NOCREATE (1 << 9)
|
#define BTREE_ITER_WITH_KEY_CACHE (1 << 9)
|
||||||
#define BTREE_ITER_WITH_UPDATES (1 << 10)
|
#define BTREE_ITER_WITH_UPDATES (1 << 10)
|
||||||
#define BTREE_ITER_WITH_JOURNAL (1 << 11)
|
#define BTREE_ITER_WITH_JOURNAL (1 << 11)
|
||||||
#define __BTREE_ITER_ALL_SNAPSHOTS (1 << 12)
|
#define __BTREE_ITER_ALL_SNAPSHOTS (1 << 12)
|
||||||
@ -277,6 +277,7 @@ struct btree_iter {
|
|||||||
struct btree_trans *trans;
|
struct btree_trans *trans;
|
||||||
struct btree_path *path;
|
struct btree_path *path;
|
||||||
struct btree_path *update_path;
|
struct btree_path *update_path;
|
||||||
|
struct btree_path *key_cache_path;
|
||||||
|
|
||||||
enum btree_id btree_id:4;
|
enum btree_id btree_id:4;
|
||||||
unsigned min_depth:4;
|
unsigned min_depth:4;
|
||||||
|
Loading…
Reference in New Issue
Block a user