d49010adae
Dump the stats into netlink. More clever approaches like dumping the stats per-CPU for each CPU individually to see where the packets get consumed can be implemented in the future. A trimmed example from a real (but recently booted system): $ ./cli.py --no-schema --spec netlink/specs/netdev.yaml \ --dump page-pool-stats-get [{'info': {'id': 19, 'ifindex': 2}, 'alloc-empty': 48, 'alloc-fast': 3024, 'alloc-refill': 0, 'alloc-slow': 48, 'alloc-slow-high-order': 0, 'alloc-waive': 0, 'recycle-cache-full': 0, 'recycle-cached': 0, 'recycle-released-refcnt': 0, 'recycle-ring': 0, 'recycle-ring-full': 0}, {'info': {'id': 18, 'ifindex': 2}, 'alloc-empty': 66, 'alloc-fast': 11811, 'alloc-refill': 35, 'alloc-slow': 66, 'alloc-slow-high-order': 0, 'alloc-waive': 0, 'recycle-cache-full': 1145, 'recycle-cached': 6541, 'recycle-released-refcnt': 0, 'recycle-ring': 1275, 'recycle-ring-full': 0}, {'info': {'id': 17, 'ifindex': 2}, 'alloc-empty': 73, 'alloc-fast': 62099, 'alloc-refill': 413, ... Acked-by: Jesper Dangaard Brouer <hawk@kernel.org> Signed-off-by: Jakub Kicinski <kuba@kernel.org> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
200 lines
6.6 KiB
ReStructuredText
200 lines
6.6 KiB
ReStructuredText
.. SPDX-License-Identifier: GPL-2.0
|
|
|
|
=============
|
|
Page Pool API
|
|
=============
|
|
|
|
.. kernel-doc:: include/net/page_pool/helpers.h
|
|
:doc: page_pool allocator
|
|
|
|
Architecture overview
|
|
=====================
|
|
|
|
.. code-block:: none
|
|
|
|
+------------------+
|
|
| Driver |
|
|
+------------------+
|
|
^
|
|
|
|
|
|
|
|
|
|
|
v
|
|
+--------------------------------------------+
|
|
| request memory |
|
|
+--------------------------------------------+
|
|
^ ^
|
|
| |
|
|
| Pool empty | Pool has entries
|
|
| |
|
|
v v
|
|
+-----------------------+ +------------------------+
|
|
| alloc (and map) pages | | get page from cache |
|
|
+-----------------------+ +------------------------+
|
|
^ ^
|
|
| |
|
|
| cache available | No entries, refill
|
|
| | from ptr-ring
|
|
| |
|
|
v v
|
|
+-----------------+ +------------------+
|
|
| Fast cache | | ptr-ring cache |
|
|
+-----------------+ +------------------+
|
|
|
|
Monitoring
|
|
==========
|
|
Information about page pools on the system can be accessed via the netdev
|
|
genetlink family (see Documentation/netlink/specs/netdev.yaml).
|
|
|
|
API interface
|
|
=============
|
|
The number of pools created **must** match the number of hardware queues
|
|
unless hardware restrictions make that impossible. This would otherwise beat the
|
|
purpose of page pool, which is allocate pages fast from cache without locking.
|
|
This lockless guarantee naturally comes from running under a NAPI softirq.
|
|
The protection doesn't strictly have to be NAPI, any guarantee that allocating
|
|
a page will cause no race conditions is enough.
|
|
|
|
.. kernel-doc:: net/core/page_pool.c
|
|
:identifiers: page_pool_create
|
|
|
|
.. kernel-doc:: include/net/page_pool/types.h
|
|
:identifiers: struct page_pool_params
|
|
|
|
.. kernel-doc:: include/net/page_pool/helpers.h
|
|
:identifiers: page_pool_put_page page_pool_put_full_page
|
|
page_pool_recycle_direct page_pool_free_va
|
|
page_pool_dev_alloc_pages page_pool_dev_alloc_frag
|
|
page_pool_dev_alloc page_pool_dev_alloc_va
|
|
page_pool_get_dma_addr page_pool_get_dma_dir
|
|
|
|
.. kernel-doc:: net/core/page_pool.c
|
|
:identifiers: page_pool_put_page_bulk page_pool_get_stats
|
|
|
|
DMA sync
|
|
--------
|
|
Driver is always responsible for syncing the pages for the CPU.
|
|
Drivers may choose to take care of syncing for the device as well
|
|
or set the ``PP_FLAG_DMA_SYNC_DEV`` flag to request that pages
|
|
allocated from the page pool are already synced for the device.
|
|
|
|
If ``PP_FLAG_DMA_SYNC_DEV`` is set, the driver must inform the core what portion
|
|
of the buffer has to be synced. This allows the core to avoid syncing the entire
|
|
page when the drivers knows that the device only accessed a portion of the page.
|
|
|
|
Most drivers will reserve headroom in front of the frame. This part
|
|
of the buffer is not touched by the device, so to avoid syncing
|
|
it drivers can set the ``offset`` field in struct page_pool_params
|
|
appropriately.
|
|
|
|
For pages recycled on the XDP xmit and skb paths the page pool will
|
|
use the ``max_len`` member of struct page_pool_params to decide how
|
|
much of the page needs to be synced (starting at ``offset``).
|
|
When directly freeing pages in the driver (page_pool_put_page())
|
|
the ``dma_sync_size`` argument specifies how much of the buffer needs
|
|
to be synced.
|
|
|
|
If in doubt set ``offset`` to 0, ``max_len`` to ``PAGE_SIZE`` and
|
|
pass -1 as ``dma_sync_size``. That combination of arguments is always
|
|
correct.
|
|
|
|
Note that the syncing parameters are for the entire page.
|
|
This is important to remember when using fragments (``PP_FLAG_PAGE_FRAG``),
|
|
where allocated buffers may be smaller than a full page.
|
|
Unless the driver author really understands page pool internals
|
|
it's recommended to always use ``offset = 0``, ``max_len = PAGE_SIZE``
|
|
with fragmented page pools.
|
|
|
|
Stats API and structures
|
|
------------------------
|
|
If the kernel is configured with ``CONFIG_PAGE_POOL_STATS=y``, the API
|
|
page_pool_get_stats() and structures described below are available.
|
|
It takes a pointer to a ``struct page_pool`` and a pointer to a struct
|
|
page_pool_stats allocated by the caller.
|
|
|
|
Older drivers expose page pool statistics via ethtool or debugfs.
|
|
The same statistics are accessible via the netlink netdev family
|
|
in a driver-independent fashion.
|
|
|
|
.. kernel-doc:: include/net/page_pool/types.h
|
|
:identifiers: struct page_pool_recycle_stats
|
|
struct page_pool_alloc_stats
|
|
struct page_pool_stats
|
|
|
|
Coding examples
|
|
===============
|
|
|
|
Registration
|
|
------------
|
|
|
|
.. code-block:: c
|
|
|
|
/* Page pool registration */
|
|
struct page_pool_params pp_params = { 0 };
|
|
struct xdp_rxq_info xdp_rxq;
|
|
int err;
|
|
|
|
pp_params.order = 0;
|
|
/* internal DMA mapping in page_pool */
|
|
pp_params.flags = PP_FLAG_DMA_MAP;
|
|
pp_params.pool_size = DESC_NUM;
|
|
pp_params.nid = NUMA_NO_NODE;
|
|
pp_params.dev = priv->dev;
|
|
pp_params.napi = napi; /* only if locking is tied to NAPI */
|
|
pp_params.dma_dir = xdp_prog ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE;
|
|
page_pool = page_pool_create(&pp_params);
|
|
|
|
err = xdp_rxq_info_reg(&xdp_rxq, ndev, 0);
|
|
if (err)
|
|
goto err_out;
|
|
|
|
err = xdp_rxq_info_reg_mem_model(&xdp_rxq, MEM_TYPE_PAGE_POOL, page_pool);
|
|
if (err)
|
|
goto err_out;
|
|
|
|
NAPI poller
|
|
-----------
|
|
|
|
|
|
.. code-block:: c
|
|
|
|
/* NAPI Rx poller */
|
|
enum dma_data_direction dma_dir;
|
|
|
|
dma_dir = page_pool_get_dma_dir(dring->page_pool);
|
|
while (done < budget) {
|
|
if (some error)
|
|
page_pool_recycle_direct(page_pool, page);
|
|
if (packet_is_xdp) {
|
|
if XDP_DROP:
|
|
page_pool_recycle_direct(page_pool, page);
|
|
} else (packet_is_skb) {
|
|
skb_mark_for_recycle(skb);
|
|
new_page = page_pool_dev_alloc_pages(page_pool);
|
|
}
|
|
}
|
|
|
|
Stats
|
|
-----
|
|
|
|
.. code-block:: c
|
|
|
|
#ifdef CONFIG_PAGE_POOL_STATS
|
|
/* retrieve stats */
|
|
struct page_pool_stats stats = { 0 };
|
|
if (page_pool_get_stats(page_pool, &stats)) {
|
|
/* perhaps the driver reports statistics with ethool */
|
|
ethtool_print_allocation_stats(&stats.alloc_stats);
|
|
ethtool_print_recycle_stats(&stats.recycle_stats);
|
|
}
|
|
#endif
|
|
|
|
Driver unload
|
|
-------------
|
|
|
|
.. code-block:: c
|
|
|
|
/* Driver unload */
|
|
page_pool_put_full_page(page_pool, page, false);
|
|
xdp_rxq_info_unreg(&xdp_rxq);
|