Merge branch 'udp-small-changes-on-receive-path'
Eric Dumazet says: ==================== udp: small changes on receive path This series is based on an observation I made in UDP receive path. The sock_def_readable() costs are pretty high, especially when epoll is used to generate EPOLLIN events. First patch annotates races on sk->sk_rcvbuf reads. Second patch replaces an atomic_add_return() with a less expensive atomic_add() Third patch avoids calling sock_def_readable() when possible. Fourth patch adds sk_wake_async_rcu() to get better inlining and code generation. ==================== Link: https://lore.kernel.org/r/20240328144032.1864988-1-edumazet@google.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
commit
9494dc0b08
@ -847,7 +847,7 @@ void af_alg_wmem_wakeup(struct sock *sk)
|
||||
wake_up_interruptible_sync_poll(&wq->wait, EPOLLIN |
|
||||
EPOLLRDNORM |
|
||||
EPOLLRDBAND);
|
||||
sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
|
||||
sk_wake_async_rcu(sk, SOCK_WAKE_WAITD, POLL_IN);
|
||||
rcu_read_unlock();
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(af_alg_wmem_wakeup);
|
||||
@ -914,7 +914,7 @@ static void af_alg_data_wakeup(struct sock *sk)
|
||||
wake_up_interruptible_sync_poll(&wq->wait, EPOLLOUT |
|
||||
EPOLLRDNORM |
|
||||
EPOLLRDBAND);
|
||||
sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
|
||||
sk_wake_async_rcu(sk, SOCK_WAKE_SPACE, POLL_OUT);
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
|
@ -2513,6 +2513,12 @@ static inline void sk_wake_async(const struct sock *sk, int how, int band)
|
||||
}
|
||||
}
|
||||
|
||||
static inline void sk_wake_async_rcu(const struct sock *sk, int how, int band)
|
||||
{
|
||||
if (unlikely(sock_flag(sk, SOCK_FASYNC)))
|
||||
sock_wake_async(rcu_dereference(sk->sk_wq), how, band);
|
||||
}
|
||||
|
||||
/* Since sk_{r,w}mem_alloc sums skb->truesize, even a small frame might
|
||||
* need sizeof(sk_buff) + MTU + padding, unless net driver perform copybreak.
|
||||
* Note: for send buffers, TCP works better if we can build two skbs at
|
||||
|
@ -116,7 +116,7 @@ static void vcc_write_space(struct sock *sk)
|
||||
if (skwq_has_sleeper(wq))
|
||||
wake_up_interruptible(&wq->wait);
|
||||
|
||||
sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
|
||||
sk_wake_async_rcu(sk, SOCK_WAKE_SPACE, POLL_OUT);
|
||||
}
|
||||
|
||||
rcu_read_unlock();
|
||||
|
@ -3338,7 +3338,7 @@ static void sock_def_error_report(struct sock *sk)
|
||||
wq = rcu_dereference(sk->sk_wq);
|
||||
if (skwq_has_sleeper(wq))
|
||||
wake_up_interruptible_poll(&wq->wait, EPOLLERR);
|
||||
sk_wake_async(sk, SOCK_WAKE_IO, POLL_ERR);
|
||||
sk_wake_async_rcu(sk, SOCK_WAKE_IO, POLL_ERR);
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
@ -3353,7 +3353,7 @@ void sock_def_readable(struct sock *sk)
|
||||
if (skwq_has_sleeper(wq))
|
||||
wake_up_interruptible_sync_poll(&wq->wait, EPOLLIN | EPOLLPRI |
|
||||
EPOLLRDNORM | EPOLLRDBAND);
|
||||
sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
|
||||
sk_wake_async_rcu(sk, SOCK_WAKE_WAITD, POLL_IN);
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
@ -3373,7 +3373,7 @@ static void sock_def_write_space(struct sock *sk)
|
||||
EPOLLWRNORM | EPOLLWRBAND);
|
||||
|
||||
/* Should agree with poll, otherwise some programs break */
|
||||
sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
|
||||
sk_wake_async_rcu(sk, SOCK_WAKE_SPACE, POLL_OUT);
|
||||
}
|
||||
|
||||
rcu_read_unlock();
|
||||
@ -3398,7 +3398,7 @@ static void sock_def_write_space_wfree(struct sock *sk)
|
||||
EPOLLWRNORM | EPOLLWRBAND);
|
||||
|
||||
/* Should agree with poll, otherwise some programs break */
|
||||
sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
|
||||
sk_wake_async_rcu(sk, SOCK_WAKE_SPACE, POLL_OUT);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -204,7 +204,7 @@ void dccp_write_space(struct sock *sk)
|
||||
wake_up_interruptible(&wq->wait);
|
||||
/* Should agree with poll, otherwise some programs break */
|
||||
if (sock_writeable(sk))
|
||||
sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
|
||||
sk_wake_async_rcu(sk, SOCK_WAKE_SPACE, POLL_OUT);
|
||||
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
@ -1492,13 +1492,15 @@ int __udp_enqueue_schedule_skb(struct sock *sk, struct sk_buff *skb)
|
||||
struct sk_buff_head *list = &sk->sk_receive_queue;
|
||||
int rmem, err = -ENOMEM;
|
||||
spinlock_t *busy = NULL;
|
||||
int size;
|
||||
bool becomes_readable;
|
||||
int size, rcvbuf;
|
||||
|
||||
/* try to avoid the costly atomic add/sub pair when the receive
|
||||
* queue is full; always allow at least a packet
|
||||
/* Immediately drop when the receive queue is full.
|
||||
* Always allow at least one packet.
|
||||
*/
|
||||
rmem = atomic_read(&sk->sk_rmem_alloc);
|
||||
if (rmem > sk->sk_rcvbuf)
|
||||
rcvbuf = READ_ONCE(sk->sk_rcvbuf);
|
||||
if (rmem > rcvbuf)
|
||||
goto drop;
|
||||
|
||||
/* Under mem pressure, it might be helpful to help udp_recvmsg()
|
||||
@ -1507,7 +1509,7 @@ int __udp_enqueue_schedule_skb(struct sock *sk, struct sk_buff *skb)
|
||||
* - Less cache line misses at copyout() time
|
||||
* - Less work at consume_skb() (less alien page frag freeing)
|
||||
*/
|
||||
if (rmem > (sk->sk_rcvbuf >> 1)) {
|
||||
if (rmem > (rcvbuf >> 1)) {
|
||||
skb_condense(skb);
|
||||
|
||||
busy = busylock_acquire(sk);
|
||||
@ -1515,12 +1517,7 @@ int __udp_enqueue_schedule_skb(struct sock *sk, struct sk_buff *skb)
|
||||
size = skb->truesize;
|
||||
udp_set_dev_scratch(skb);
|
||||
|
||||
/* we drop only if the receive buf is full and the receive
|
||||
* queue contains some other skb
|
||||
*/
|
||||
rmem = atomic_add_return(size, &sk->sk_rmem_alloc);
|
||||
if (rmem > (size + (unsigned int)sk->sk_rcvbuf))
|
||||
goto uncharge_drop;
|
||||
atomic_add(size, &sk->sk_rmem_alloc);
|
||||
|
||||
spin_lock(&list->lock);
|
||||
err = udp_rmem_schedule(sk, size);
|
||||
@ -1536,12 +1533,19 @@ int __udp_enqueue_schedule_skb(struct sock *sk, struct sk_buff *skb)
|
||||
*/
|
||||
sock_skb_set_dropcount(sk, skb);
|
||||
|
||||
becomes_readable = skb_queue_empty(list);
|
||||
__skb_queue_tail(list, skb);
|
||||
spin_unlock(&list->lock);
|
||||
|
||||
if (!sock_flag(sk, SOCK_DEAD))
|
||||
INDIRECT_CALL_1(sk->sk_data_ready, sock_def_readable, sk);
|
||||
|
||||
if (!sock_flag(sk, SOCK_DEAD)) {
|
||||
if (becomes_readable ||
|
||||
sk->sk_data_ready != sock_def_readable ||
|
||||
READ_ONCE(sk->sk_peek_off) >= 0)
|
||||
INDIRECT_CALL_1(sk->sk_data_ready,
|
||||
sock_def_readable, sk);
|
||||
else
|
||||
sk_wake_async_rcu(sk, SOCK_WAKE_WAITD, POLL_IN);
|
||||
}
|
||||
busylock_release(busy);
|
||||
return 0;
|
||||
|
||||
|
@ -184,7 +184,7 @@ static void iucv_sock_wake_msglim(struct sock *sk)
|
||||
wq = rcu_dereference(sk->sk_wq);
|
||||
if (skwq_has_sleeper(wq))
|
||||
wake_up_interruptible_all(&wq->wait);
|
||||
sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
|
||||
sk_wake_async_rcu(sk, SOCK_WAKE_SPACE, POLL_OUT);
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
|
@ -65,7 +65,7 @@ static void rxrpc_write_space(struct sock *sk)
|
||||
|
||||
if (skwq_has_sleeper(wq))
|
||||
wake_up_interruptible(&wq->wait);
|
||||
sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
|
||||
sk_wake_async_rcu(sk, SOCK_WAKE_SPACE, POLL_OUT);
|
||||
}
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
@ -9276,7 +9276,7 @@ void sctp_data_ready(struct sock *sk)
|
||||
if (skwq_has_sleeper(wq))
|
||||
wake_up_interruptible_sync_poll(&wq->wait, EPOLLIN |
|
||||
EPOLLRDNORM | EPOLLRDBAND);
|
||||
sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
|
||||
sk_wake_async_rcu(sk, SOCK_WAKE_WAITD, POLL_IN);
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
|
@ -42,10 +42,10 @@ static void smc_rx_wake_up(struct sock *sk)
|
||||
if (skwq_has_sleeper(wq))
|
||||
wake_up_interruptible_sync_poll(&wq->wait, EPOLLIN | EPOLLPRI |
|
||||
EPOLLRDNORM | EPOLLRDBAND);
|
||||
sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
|
||||
sk_wake_async_rcu(sk, SOCK_WAKE_WAITD, POLL_IN);
|
||||
if ((sk->sk_shutdown == SHUTDOWN_MASK) ||
|
||||
(sk->sk_state == SMC_CLOSED))
|
||||
sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_HUP);
|
||||
sk_wake_async_rcu(sk, SOCK_WAKE_WAITD, POLL_HUP);
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
|
@ -546,7 +546,7 @@ static void unix_write_space(struct sock *sk)
|
||||
if (skwq_has_sleeper(wq))
|
||||
wake_up_interruptible_sync_poll(&wq->wait,
|
||||
EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND);
|
||||
sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
|
||||
sk_wake_async_rcu(sk, SOCK_WAKE_SPACE, POLL_OUT);
|
||||
}
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user