block: support to account io_ticks precisely
Currently, io_ticks is accounted based on sampling, specifically update_io_ticks() will always account io_ticks by 1 jiffies from bdev_start_io_acct()/blk_account_io_start(), and the result can be inaccurate, for example(HZ is 250): Test script: fio -filename=/dev/sda -bs=4k -rw=write -direct=1 -name=test -thinktime=4ms Test result: util is about 90%, while the disk is really idle. This behaviour is introduced by commit5b18b5a737
("block: delete part_round_stats and switch to less precise counting"), however, there was a key point that is missed that this patch also improve performance a lot: Before the commit: part_round_stats: if (part->stamp != now) stats |= 1; part_in_flight() -> there can be lots of task here in 1 jiffies. part_round_stats_single() __part_stat_add() part->stamp = now; After the commit: update_io_ticks: stamp = part->bd_stamp; if (time_after(now, stamp)) if (try_cmpxchg()) __part_stat_add() -> only one task can reach here in 1 jiffies. Hence in order to account io_ticks precisely, we only need to know if there are IO inflight at most once in one jiffies. Noted that for rq-based device, iterating tags should not be used here because 'tags->lock' is grabbed in blk_mq_find_and_get_req(), hence part_stat_lock_inc/dec() and part_in_flight() is used to trace inflight. The additional overhead is quite little: - per cpu add/dec for each IO for rq-based device; - per cpu sum for each jiffies; And it's verified by null-blk that there are no performance degration under heavy IO pressure. Fixes:5b18b5a737
("block: delete part_round_stats and switch to less precise counting") Signed-off-by: Yu Kuai <yukuai3@huawei.com> Link: https://lore.kernel.org/r/20240509123717.3223892-2-yukuai1@huaweicloud.com Signed-off-by: Jens Axboe <axboe@kernel.dk>
This commit is contained in:
parent
060406c61c
commit
99dc422335
@ -984,10 +984,11 @@ void update_io_ticks(struct block_device *part, unsigned long now, bool end)
|
||||
unsigned long stamp;
|
||||
again:
|
||||
stamp = READ_ONCE(part->bd_stamp);
|
||||
if (unlikely(time_after(now, stamp))) {
|
||||
if (likely(try_cmpxchg(&part->bd_stamp, &stamp, now)))
|
||||
__part_stat_add(part, io_ticks, end ? now - stamp : 1);
|
||||
}
|
||||
if (unlikely(time_after(now, stamp)) &&
|
||||
likely(try_cmpxchg(&part->bd_stamp, &stamp, now)) &&
|
||||
(end || part_in_flight(part)))
|
||||
__part_stat_add(part, io_ticks, now - stamp);
|
||||
|
||||
if (part->bd_partno) {
|
||||
part = bdev_whole(part);
|
||||
goto again;
|
||||
|
@ -780,6 +780,8 @@ static void blk_account_io_merge_request(struct request *req)
|
||||
if (blk_do_io_stat(req)) {
|
||||
part_stat_lock();
|
||||
part_stat_inc(req->part, merges[op_stat_group(req_op(req))]);
|
||||
part_stat_local_dec(req->part,
|
||||
in_flight[op_is_write(req_op(req))]);
|
||||
part_stat_unlock();
|
||||
}
|
||||
}
|
||||
|
@ -996,6 +996,8 @@ static inline void blk_account_io_done(struct request *req, u64 now)
|
||||
update_io_ticks(req->part, jiffies, true);
|
||||
part_stat_inc(req->part, ios[sgrp]);
|
||||
part_stat_add(req->part, nsecs[sgrp], now - req->start_time_ns);
|
||||
part_stat_local_dec(req->part,
|
||||
in_flight[op_is_write(req_op(req))]);
|
||||
part_stat_unlock();
|
||||
}
|
||||
}
|
||||
@ -1018,6 +1020,8 @@ static inline void blk_account_io_start(struct request *req)
|
||||
|
||||
part_stat_lock();
|
||||
update_io_ticks(req->part, jiffies, false);
|
||||
part_stat_local_inc(req->part,
|
||||
in_flight[op_is_write(req_op(req))]);
|
||||
part_stat_unlock();
|
||||
}
|
||||
}
|
||||
|
@ -366,6 +366,7 @@ static inline bool blk_do_io_stat(struct request *rq)
|
||||
}
|
||||
|
||||
void update_io_ticks(struct block_device *part, unsigned long now, bool end);
|
||||
unsigned int part_in_flight(struct block_device *part);
|
||||
|
||||
static inline void req_set_nomerge(struct request_queue *q, struct request *req)
|
||||
{
|
||||
|
@ -118,7 +118,7 @@ static void part_stat_read_all(struct block_device *part,
|
||||
}
|
||||
}
|
||||
|
||||
static unsigned int part_in_flight(struct block_device *part)
|
||||
unsigned int part_in_flight(struct block_device *part)
|
||||
{
|
||||
unsigned int inflight = 0;
|
||||
int cpu;
|
||||
|
Loading…
Reference in New Issue
Block a user