net: dqs: make struct dql more cache efficient
With the previous change, struct dqs->stall_thrs will be in the hot path (at queue side), even if DQS is disabled. The other fields accessed in this function (last_obj_cnt and num_queued) are in the first cache line, let's move this field (stall_thrs) to the very first cache line, since there is a hole there. This does not change the structure size, since it moves an short (2 bytes) to 4-bytes whole in the first cache line. This is the new structure format now: struct dql { unsigned int num_queued; unsigned int last_obj_cnt; ... short unsigned int stall_thrs; /* XXX 2 bytes hole, try to pack */ ... /* --- cacheline 1 boundary (64 bytes) --- */ ... /* Longest stall detected, reported to user */ short unsigned int stall_max; /* XXX 2 bytes hole, try to pack */ }; Also, read the stall_thrs (now in the very first cache line) earlier, together with dql->num_queued (also in the first cache line). Suggested-by: Jakub Kicinski <kuba@kernel.org> Suggested-by: Eric Dumazet <edumazet@google.com> Signed-off-by: Breno Leitao <leitao@debian.org> Link: https://lore.kernel.org/r/20240411192241.2498631-5-leitao@debian.org Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
parent
721f076b62
commit
4ba67ef3a1
@ -50,6 +50,9 @@ struct dql {
|
||||
unsigned int adj_limit; /* limit + num_completed */
|
||||
unsigned int last_obj_cnt; /* Count at last queuing */
|
||||
|
||||
/* Stall threshold (in jiffies), defined by user */
|
||||
unsigned short stall_thrs;
|
||||
|
||||
unsigned long history_head; /* top 58 bits of jiffies */
|
||||
/* stall entries, a bit per entry */
|
||||
unsigned long history[DQL_HIST_LEN];
|
||||
@ -71,8 +74,6 @@ struct dql {
|
||||
unsigned int min_limit; /* Minimum limit */
|
||||
unsigned int slack_hold_time; /* Time to measure slack */
|
||||
|
||||
/* Stall threshold (in jiffies), defined by user */
|
||||
unsigned short stall_thrs;
|
||||
/* Longest stall detected, reported to user */
|
||||
unsigned short stall_max;
|
||||
unsigned long last_reap; /* Last reap (in jiffies) */
|
||||
|
@ -15,12 +15,10 @@
|
||||
#define POSDIFF(A, B) ((int)((A) - (B)) > 0 ? (A) - (B) : 0)
|
||||
#define AFTER_EQ(A, B) ((int)((A) - (B)) >= 0)
|
||||
|
||||
static void dql_check_stall(struct dql *dql)
|
||||
static void dql_check_stall(struct dql *dql, unsigned short stall_thrs)
|
||||
{
|
||||
unsigned short stall_thrs;
|
||||
unsigned long now;
|
||||
|
||||
stall_thrs = READ_ONCE(dql->stall_thrs);
|
||||
if (!stall_thrs)
|
||||
return;
|
||||
|
||||
@ -86,9 +84,16 @@ void dql_completed(struct dql *dql, unsigned int count)
|
||||
{
|
||||
unsigned int inprogress, prev_inprogress, limit;
|
||||
unsigned int ovlimit, completed, num_queued;
|
||||
unsigned short stall_thrs;
|
||||
bool all_prev_completed;
|
||||
|
||||
num_queued = READ_ONCE(dql->num_queued);
|
||||
/* Read stall_thrs in advance since it belongs to the same (first)
|
||||
* cache line as ->num_queued. This way, dql_check_stall() does not
|
||||
* need to touch the first cache line again later, reducing the window
|
||||
* of possible false sharing.
|
||||
*/
|
||||
stall_thrs = READ_ONCE(dql->stall_thrs);
|
||||
|
||||
/* Can't complete more than what's in queue */
|
||||
BUG_ON(count > num_queued - dql->num_completed);
|
||||
@ -178,7 +183,7 @@ void dql_completed(struct dql *dql, unsigned int count)
|
||||
dql->num_completed = completed;
|
||||
dql->prev_num_queued = num_queued;
|
||||
|
||||
dql_check_stall(dql);
|
||||
dql_check_stall(dql, stall_thrs);
|
||||
}
|
||||
EXPORT_SYMBOL(dql_completed);
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user