1

Merge branch 'udp-small-changes-on-receive-path'

Eric Dumazet says:

====================
udp: small changes on receive path

This series is based on an observation I made in UDP receive path.

The sock_def_readable() costs are pretty high, especially when
epoll is used to generate EPOLLIN events.

First patch annotates races on sk->sk_rcvbuf reads.

Second patch replaces an atomic_add_return()
 with a less expensive atomic_add()

Third patch avoids calling sock_def_readable() when possible.

Fourth patch adds sk_wake_async_rcu() to get better inlining
and code generation.
====================

Link: https://lore.kernel.org/r/20240328144032.1864988-1-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
Jakub Kicinski 2024-03-29 15:03:14 -07:00
commit 9494dc0b08
11 changed files with 38 additions and 28 deletions

View File

@ -847,7 +847,7 @@ void af_alg_wmem_wakeup(struct sock *sk)
wake_up_interruptible_sync_poll(&wq->wait, EPOLLIN |
EPOLLRDNORM |
EPOLLRDBAND);
sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
sk_wake_async_rcu(sk, SOCK_WAKE_WAITD, POLL_IN);
rcu_read_unlock();
}
EXPORT_SYMBOL_GPL(af_alg_wmem_wakeup);
@ -914,7 +914,7 @@ static void af_alg_data_wakeup(struct sock *sk)
wake_up_interruptible_sync_poll(&wq->wait, EPOLLOUT |
EPOLLRDNORM |
EPOLLRDBAND);
sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
sk_wake_async_rcu(sk, SOCK_WAKE_SPACE, POLL_OUT);
rcu_read_unlock();
}

View File

@ -2513,6 +2513,12 @@ static inline void sk_wake_async(const struct sock *sk, int how, int band)
}
}
static inline void sk_wake_async_rcu(const struct sock *sk, int how, int band)
{
if (unlikely(sock_flag(sk, SOCK_FASYNC)))
sock_wake_async(rcu_dereference(sk->sk_wq), how, band);
}
/* Since sk_{r,w}mem_alloc sums skb->truesize, even a small frame might
* need sizeof(sk_buff) + MTU + padding, unless net driver perform copybreak.
* Note: for send buffers, TCP works better if we can build two skbs at

View File

@ -116,7 +116,7 @@ static void vcc_write_space(struct sock *sk)
if (skwq_has_sleeper(wq))
wake_up_interruptible(&wq->wait);
sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
sk_wake_async_rcu(sk, SOCK_WAKE_SPACE, POLL_OUT);
}
rcu_read_unlock();

View File

@ -3338,7 +3338,7 @@ static void sock_def_error_report(struct sock *sk)
wq = rcu_dereference(sk->sk_wq);
if (skwq_has_sleeper(wq))
wake_up_interruptible_poll(&wq->wait, EPOLLERR);
sk_wake_async(sk, SOCK_WAKE_IO, POLL_ERR);
sk_wake_async_rcu(sk, SOCK_WAKE_IO, POLL_ERR);
rcu_read_unlock();
}
@ -3353,7 +3353,7 @@ void sock_def_readable(struct sock *sk)
if (skwq_has_sleeper(wq))
wake_up_interruptible_sync_poll(&wq->wait, EPOLLIN | EPOLLPRI |
EPOLLRDNORM | EPOLLRDBAND);
sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
sk_wake_async_rcu(sk, SOCK_WAKE_WAITD, POLL_IN);
rcu_read_unlock();
}
@ -3373,7 +3373,7 @@ static void sock_def_write_space(struct sock *sk)
EPOLLWRNORM | EPOLLWRBAND);
/* Should agree with poll, otherwise some programs break */
sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
sk_wake_async_rcu(sk, SOCK_WAKE_SPACE, POLL_OUT);
}
rcu_read_unlock();
@ -3398,7 +3398,7 @@ static void sock_def_write_space_wfree(struct sock *sk)
EPOLLWRNORM | EPOLLWRBAND);
/* Should agree with poll, otherwise some programs break */
sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
sk_wake_async_rcu(sk, SOCK_WAKE_SPACE, POLL_OUT);
}
}

View File

@ -204,7 +204,7 @@ void dccp_write_space(struct sock *sk)
wake_up_interruptible(&wq->wait);
/* Should agree with poll, otherwise some programs break */
if (sock_writeable(sk))
sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
sk_wake_async_rcu(sk, SOCK_WAKE_SPACE, POLL_OUT);
rcu_read_unlock();
}

View File

@ -1492,13 +1492,15 @@ int __udp_enqueue_schedule_skb(struct sock *sk, struct sk_buff *skb)
struct sk_buff_head *list = &sk->sk_receive_queue;
int rmem, err = -ENOMEM;
spinlock_t *busy = NULL;
int size;
bool becomes_readable;
int size, rcvbuf;
/* try to avoid the costly atomic add/sub pair when the receive
* queue is full; always allow at least a packet
/* Immediately drop when the receive queue is full.
* Always allow at least one packet.
*/
rmem = atomic_read(&sk->sk_rmem_alloc);
if (rmem > sk->sk_rcvbuf)
rcvbuf = READ_ONCE(sk->sk_rcvbuf);
if (rmem > rcvbuf)
goto drop;
/* Under mem pressure, it might be helpful to help udp_recvmsg()
@ -1507,7 +1509,7 @@ int __udp_enqueue_schedule_skb(struct sock *sk, struct sk_buff *skb)
* - Less cache line misses at copyout() time
* - Less work at consume_skb() (less alien page frag freeing)
*/
if (rmem > (sk->sk_rcvbuf >> 1)) {
if (rmem > (rcvbuf >> 1)) {
skb_condense(skb);
busy = busylock_acquire(sk);
@ -1515,12 +1517,7 @@ int __udp_enqueue_schedule_skb(struct sock *sk, struct sk_buff *skb)
size = skb->truesize;
udp_set_dev_scratch(skb);
/* we drop only if the receive buf is full and the receive
* queue contains some other skb
*/
rmem = atomic_add_return(size, &sk->sk_rmem_alloc);
if (rmem > (size + (unsigned int)sk->sk_rcvbuf))
goto uncharge_drop;
atomic_add(size, &sk->sk_rmem_alloc);
spin_lock(&list->lock);
err = udp_rmem_schedule(sk, size);
@ -1536,12 +1533,19 @@ int __udp_enqueue_schedule_skb(struct sock *sk, struct sk_buff *skb)
*/
sock_skb_set_dropcount(sk, skb);
becomes_readable = skb_queue_empty(list);
__skb_queue_tail(list, skb);
spin_unlock(&list->lock);
if (!sock_flag(sk, SOCK_DEAD))
INDIRECT_CALL_1(sk->sk_data_ready, sock_def_readable, sk);
if (!sock_flag(sk, SOCK_DEAD)) {
if (becomes_readable ||
sk->sk_data_ready != sock_def_readable ||
READ_ONCE(sk->sk_peek_off) >= 0)
INDIRECT_CALL_1(sk->sk_data_ready,
sock_def_readable, sk);
else
sk_wake_async_rcu(sk, SOCK_WAKE_WAITD, POLL_IN);
}
busylock_release(busy);
return 0;

View File

@ -184,7 +184,7 @@ static void iucv_sock_wake_msglim(struct sock *sk)
wq = rcu_dereference(sk->sk_wq);
if (skwq_has_sleeper(wq))
wake_up_interruptible_all(&wq->wait);
sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
sk_wake_async_rcu(sk, SOCK_WAKE_SPACE, POLL_OUT);
rcu_read_unlock();
}

View File

@ -65,7 +65,7 @@ static void rxrpc_write_space(struct sock *sk)
if (skwq_has_sleeper(wq))
wake_up_interruptible(&wq->wait);
sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
sk_wake_async_rcu(sk, SOCK_WAKE_SPACE, POLL_OUT);
}
rcu_read_unlock();
}

View File

@ -9276,7 +9276,7 @@ void sctp_data_ready(struct sock *sk)
if (skwq_has_sleeper(wq))
wake_up_interruptible_sync_poll(&wq->wait, EPOLLIN |
EPOLLRDNORM | EPOLLRDBAND);
sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
sk_wake_async_rcu(sk, SOCK_WAKE_WAITD, POLL_IN);
rcu_read_unlock();
}

View File

@ -42,10 +42,10 @@ static void smc_rx_wake_up(struct sock *sk)
if (skwq_has_sleeper(wq))
wake_up_interruptible_sync_poll(&wq->wait, EPOLLIN | EPOLLPRI |
EPOLLRDNORM | EPOLLRDBAND);
sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
sk_wake_async_rcu(sk, SOCK_WAKE_WAITD, POLL_IN);
if ((sk->sk_shutdown == SHUTDOWN_MASK) ||
(sk->sk_state == SMC_CLOSED))
sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_HUP);
sk_wake_async_rcu(sk, SOCK_WAKE_WAITD, POLL_HUP);
rcu_read_unlock();
}

View File

@ -546,7 +546,7 @@ static void unix_write_space(struct sock *sk)
if (skwq_has_sleeper(wq))
wake_up_interruptible_sync_poll(&wq->wait,
EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND);
sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
sk_wake_async_rcu(sk, SOCK_WAKE_SPACE, POLL_OUT);
}
rcu_read_unlock();
}