1

mlxsw: pci: Use page pool for Rx buffers allocation

As part of driver init, all Rx queues are filled with buffers for
hardware usage. Later, when a packet is received, a new buffer should be
allocated to be used by hardware instead of the received buffer.
Packet's processing time includes allocation time, which can be improved
using page pool.

Using page pool, DMA mapping is done only for first allocation of buffers.
As subsequent buffers allocation avoid DMA mapping, it results in
performance improvement. The purpose of page pool is to allocate pages fast
from cache without locking. This lockless guarantee naturally comes from
running under a NAPI.

Use page pool to allocate the data buffer only, so hardware will use it to
fill the packet. At completion time, attach the data buffer (now filled
with packet payload) to new SKB which is allocated around the received
buffer. SKB building at completion time prevents cache miss for each
packet, as now the SKB is allocated right before packets will be handled by
networking stack.

Page pool for each Rx queue enhances Rx side performance by reclaiming
buffers back to each queue specific pool. This change significantly
improves driver performance, CPU can handle about 345% of the packets per
second it previously handled.

Signed-off-by: Amit Cohen <amcohen@nvidia.com>
Reviewed-by: Petr Machata <petrm@nvidia.com>
Reviewed-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: Petr Machata <petrm@nvidia.com>
Reviewed-by: Jiri Pirko <jiri@nvidia.com>
Link: https://lore.kernel.org/r/1cf788a8f43c70aae6d526018ef77becb27ad6d3.1718709196.git.petrm@nvidia.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
Amit Cohen 2024-06-18 13:34:43 +02:00 committed by Jakub Kicinski
parent 5642c6a086
commit b5b60bb491

View File

@ -62,6 +62,7 @@ struct mlxsw_pci_mem_item {
};
struct mlxsw_pci_queue_elem_info {
struct page *page;
char *elem; /* pointer to actual dma mapped element mem chunk */
union {
struct {
@ -346,6 +347,19 @@ static void mlxsw_pci_sdq_fini(struct mlxsw_pci *mlxsw_pci,
(MLXSW_PCI_SKB_HEADROOM + \
SKB_DATA_ALIGN(sizeof(struct skb_shared_info)))
static void
mlxsw_pci_wqe_rx_frag_set(struct mlxsw_pci *mlxsw_pci, struct page *page,
char *wqe, int index, size_t frag_len)
{
dma_addr_t mapaddr;
mapaddr = page_pool_get_dma_addr(page);
mapaddr += MLXSW_PCI_SKB_HEADROOM;
mlxsw_pci_wqe_address_set(wqe, index, mapaddr);
mlxsw_pci_wqe_byte_count_set(wqe, index, frag_len);
}
static int mlxsw_pci_wqe_frag_map(struct mlxsw_pci *mlxsw_pci, char *wqe,
int index, char *frag_data, size_t frag_len,
int direction)
@ -375,43 +389,46 @@ static void mlxsw_pci_wqe_frag_unmap(struct mlxsw_pci *mlxsw_pci, char *wqe,
dma_unmap_single(&pdev->dev, mapaddr, frag_len, direction);
}
static int mlxsw_pci_rdq_skb_alloc(struct mlxsw_pci *mlxsw_pci,
struct mlxsw_pci_queue_elem_info *elem_info,
gfp_t gfp)
static struct sk_buff *mlxsw_pci_rdq_build_skb(struct page *page,
u16 byte_count)
{
size_t buf_len = MLXSW_PORT_MAX_MTU;
char *wqe = elem_info->elem;
void *data = page_address(page);
unsigned int allocated_size;
struct sk_buff *skb;
int err;
skb = __netdev_alloc_skb_ip_align(NULL, buf_len, gfp);
if (!skb)
return -ENOMEM;
allocated_size = page_size(page);
skb = napi_build_skb(data, allocated_size);
if (unlikely(!skb))
return ERR_PTR(-ENOMEM);
err = mlxsw_pci_wqe_frag_map(mlxsw_pci, wqe, 0, skb->data,
buf_len, DMA_FROM_DEVICE);
if (err)
goto err_frag_map;
elem_info->u.rdq.skb = skb;
return 0;
err_frag_map:
dev_kfree_skb_any(skb);
return err;
skb_reserve(skb, MLXSW_PCI_SKB_HEADROOM);
skb_put(skb, byte_count);
return skb;
}
static void mlxsw_pci_rdq_skb_free(struct mlxsw_pci *mlxsw_pci,
struct mlxsw_pci_queue_elem_info *elem_info)
static int mlxsw_pci_rdq_page_alloc(struct mlxsw_pci_queue *q,
struct mlxsw_pci_queue_elem_info *elem_info)
{
struct sk_buff *skb;
char *wqe;
struct mlxsw_pci_queue *cq = q->u.rdq.cq;
size_t buf_len = MLXSW_PORT_MAX_MTU;
char *wqe = elem_info->elem;
struct page *page;
skb = elem_info->u.rdq.skb;
wqe = elem_info->elem;
page = page_pool_dev_alloc_pages(cq->u.cq.page_pool);
if (unlikely(!page))
return -ENOMEM;
mlxsw_pci_wqe_frag_unmap(mlxsw_pci, wqe, 0, DMA_FROM_DEVICE);
dev_kfree_skb_any(skb);
mlxsw_pci_wqe_rx_frag_set(q->pci, page, wqe, 0, buf_len);
elem_info->page = page;
return 0;
}
static void mlxsw_pci_rdq_page_free(struct mlxsw_pci_queue *q,
struct mlxsw_pci_queue_elem_info *elem_info)
{
struct mlxsw_pci_queue *cq = q->u.rdq.cq;
page_pool_put_page(cq->u.cq.page_pool, elem_info->page, -1, false);
}
static int mlxsw_pci_rdq_init(struct mlxsw_pci *mlxsw_pci, char *mbox,
@ -452,7 +469,7 @@ static int mlxsw_pci_rdq_init(struct mlxsw_pci *mlxsw_pci, char *mbox,
for (i = 0; i < q->count; i++) {
elem_info = mlxsw_pci_queue_elem_info_producer_get(q);
BUG_ON(!elem_info);
err = mlxsw_pci_rdq_skb_alloc(mlxsw_pci, elem_info, GFP_KERNEL);
err = mlxsw_pci_rdq_page_alloc(q, elem_info);
if (err)
goto rollback;
/* Everything is set up, ring doorbell to pass elem to HW */
@ -465,7 +482,7 @@ static int mlxsw_pci_rdq_init(struct mlxsw_pci *mlxsw_pci, char *mbox,
rollback:
for (i--; i >= 0; i--) {
elem_info = mlxsw_pci_queue_elem_info_get(q, i);
mlxsw_pci_rdq_skb_free(mlxsw_pci, elem_info);
mlxsw_pci_rdq_page_free(q, elem_info);
}
q->u.rdq.cq = NULL;
cq->u.cq.dq = NULL;
@ -483,7 +500,7 @@ static void mlxsw_pci_rdq_fini(struct mlxsw_pci *mlxsw_pci,
mlxsw_cmd_hw2sw_rdq(mlxsw_pci->core, q->num);
for (i = 0; i < q->count; i++) {
elem_info = mlxsw_pci_queue_elem_info_get(q, i);
mlxsw_pci_rdq_skb_free(mlxsw_pci, elem_info);
mlxsw_pci_rdq_page_free(q, elem_info);
}
}
@ -618,26 +635,38 @@ static void mlxsw_pci_cqe_rdq_handle(struct mlxsw_pci *mlxsw_pci,
{
struct pci_dev *pdev = mlxsw_pci->pdev;
struct mlxsw_pci_queue_elem_info *elem_info;
struct mlxsw_pci_queue *cq = q->u.rdq.cq;
struct mlxsw_rx_info rx_info = {};
char wqe[MLXSW_PCI_WQE_SIZE];
struct sk_buff *skb;
struct page *page;
u16 byte_count;
int err;
elem_info = mlxsw_pci_queue_elem_info_consumer_get(q);
skb = elem_info->u.rdq.skb;
memcpy(wqe, elem_info->elem, MLXSW_PCI_WQE_SIZE);
if (q->consumer_counter++ != consumer_counter_limit)
dev_dbg_ratelimited(&pdev->dev, "Consumer counter does not match limit in RDQ\n");
err = mlxsw_pci_rdq_skb_alloc(mlxsw_pci, elem_info, GFP_ATOMIC);
byte_count = mlxsw_pci_cqe_byte_count_get(cqe);
if (mlxsw_pci_cqe_crc_get(cqe_v, cqe))
byte_count -= ETH_FCS_LEN;
page = elem_info->page;
err = mlxsw_pci_rdq_page_alloc(q, elem_info);
if (err) {
dev_err_ratelimited(&pdev->dev, "Failed to alloc skb for RDQ\n");
dev_err_ratelimited(&pdev->dev, "Failed to alloc page\n");
goto out;
}
mlxsw_pci_wqe_frag_unmap(mlxsw_pci, wqe, 0, DMA_FROM_DEVICE);
skb = mlxsw_pci_rdq_build_skb(page, byte_count);
if (IS_ERR(skb)) {
dev_err_ratelimited(&pdev->dev, "Failed to build skb for RDQ\n");
page_pool_recycle_direct(cq->u.cq.page_pool, page);
goto out;
}
skb_mark_for_recycle(skb);
if (mlxsw_pci_cqe_lag_get(cqe_v, cqe)) {
rx_info.is_lag = true;
@ -670,10 +699,6 @@ static void mlxsw_pci_cqe_rdq_handle(struct mlxsw_pci *mlxsw_pci,
mlxsw_pci_skb_cb_ts_set(mlxsw_pci, skb, cqe_v, cqe);
byte_count = mlxsw_pci_cqe_byte_count_get(cqe);
if (mlxsw_pci_cqe_crc_get(cqe_v, cqe))
byte_count -= ETH_FCS_LEN;
skb_put(skb, byte_count);
mlxsw_core_skb_receive(mlxsw_pci->core, skb, &rx_info);
out: