1

bcachefs: Improve trans_restart_split_race tracepoint

Seeing occasional test failures where we get stuck in a livelock that
involves this event - this will help track it down.

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
Kent Overstreet 2023-03-30 16:04:02 -04:00
parent 25d8f40560
commit 3d86f13df6
3 changed files with 32 additions and 5 deletions

View File

@ -1680,7 +1680,7 @@ static int bch2_btree_insert_node(struct btree_update *as, struct btree_trans *t
BUG_ON(!as || as->b);
bch2_verify_keylist_sorted(keys);
if (!(local_clock() & 63))
if ((local_clock() & 63) == 63)
return btree_trans_restart(trans, BCH_ERR_transaction_restart_split_race);
ret = bch2_btree_node_lock_write(trans, path, &b->c);
@ -1720,7 +1720,7 @@ split:
* bch2_btree_path_upgrade() and allocating more nodes:
*/
if (b->c.level >= as->update_level) {
trace_and_count(c, trans_restart_split_race, trans, _THIS_IP_);
trace_and_count(c, trans_restart_split_race, trans, _THIS_IP_, b);
return btree_trans_restart(trans, BCH_ERR_transaction_restart_split_race);
}

View File

@ -2,8 +2,10 @@
#include "bcachefs.h"
#include "alloc_types.h"
#include "buckets.h"
#include "btree_cache.h"
#include "btree_iter.h"
#include "btree_locking.h"
#include "btree_update_interior.h"
#include "keylist.h"
#include "opts.h"
#include "six.h"

View File

@ -831,10 +831,35 @@ DEFINE_EVENT(transaction_event, trans_restart_injected,
TP_ARGS(trans, caller_ip)
);
DEFINE_EVENT(transaction_event, trans_restart_split_race,
TRACE_EVENT(trans_restart_split_race,
TP_PROTO(struct btree_trans *trans,
unsigned long caller_ip),
TP_ARGS(trans, caller_ip)
unsigned long caller_ip,
struct btree *b),
TP_ARGS(trans, caller_ip, b),
TP_STRUCT__entry(
__array(char, trans_fn, 32 )
__field(unsigned long, caller_ip )
__field(u8, level )
__field(u16, written )
__field(u16, blocks )
__field(u16, u64s_remaining )
),
TP_fast_assign(
strscpy(__entry->trans_fn, trans->fn, sizeof(__entry->trans_fn));
__entry->caller_ip = caller_ip;
__entry->level = b->c.level;
__entry->written = b->written;
__entry->blocks = btree_blocks(trans->c);
__entry->u64s_remaining = bch_btree_keys_u64s_remaining(trans->c, b);
),
TP_printk("%s %pS l=%u written %u/%u u64s remaining %u",
__entry->trans_fn, (void *) __entry->caller_ip,
__entry->level,
__entry->written, __entry->blocks,
__entry->u64s_remaining)
);
DEFINE_EVENT(transaction_event, trans_blocked_journal_reclaim,