91cdcd8d62
Profiling the munmap() of a zswapped memory region shows 60% of the total cycles currently going into updating the zswap_pool_total_size. There are three consumers of this counter: - store, to enforce the globally configured pool limit - meminfo & debugfs, to report the size to the user - shrink, to determine the batch size for each cycle Instead of aggregating everytime an entry enters or exits the zswap pool, aggregate the value from the zpools on-demand: - Stores aggregate the counter anyway upon success. Aggregating to check the limit instead is the same amount of work. - Meminfo & debugfs might benefit somewhat from a pre-aggregated counter, but aren't exactly hotpaths. - Shrinking can aggregate once for every cycle instead of doing it for every freed entry. As the shrinker might work on tens or hundreds of objects per scan cycle, this is a large reduction in aggregations. The paths that benefit dramatically are swapin, swapoff, and unmaps. There could be millions of pages being processed until somebody asks for the pool size again. This eliminates the pool size updates from those paths entirely. Top profile entries for a 24G range munmap(), before: 38.54% zswap-unmap [kernel.kallsyms] [k] zs_zpool_total_size 12.51% zswap-unmap [kernel.kallsyms] [k] zpool_get_total_size 9.10% zswap-unmap [kernel.kallsyms] [k] zswap_update_total_size 2.95% zswap-unmap [kernel.kallsyms] [k] obj_cgroup_uncharge_zswap 2.88% zswap-unmap [kernel.kallsyms] [k] __slab_free 2.86% zswap-unmap [kernel.kallsyms] [k] xas_store and after: 7.70% zswap-unmap [kernel.kallsyms] [k] __slab_free 7.16% zswap-unmap [kernel.kallsyms] [k] obj_cgroup_uncharge_zswap 6.74% zswap-unmap [kernel.kallsyms] [k] xas_store It was also briefly considered to move to a single atomic in zswap that is updated by the backends, since zswap only cares about the sum of all pools anyway. However, zram directly needs per-pool information out of zsmalloc. To keep the backend from having to update two atomics every time, I opted for the lazy aggregation instead for now. Link: https://lkml.kernel.org/r/20240312153901.3441-1-hannes@cmpxchg.org Signed-off-by: Johannes Weiner <hannes@cmpxchg.org> Acked-by: Yosry Ahmed <yosryahmed@google.com> Reviewed-by: Chengming Zhou <chengming.zhou@linux.dev> Reviewed-by: Nhat Pham <nphamcs@gmail.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
182 lines
5.7 KiB
C
182 lines
5.7 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
#include <linux/fs.h>
|
|
#include <linux/init.h>
|
|
#include <linux/kernel.h>
|
|
#include <linux/mm.h>
|
|
#include <linux/hugetlb.h>
|
|
#include <linux/mman.h>
|
|
#include <linux/mmzone.h>
|
|
#include <linux/memblock.h>
|
|
#include <linux/proc_fs.h>
|
|
#include <linux/percpu.h>
|
|
#include <linux/seq_file.h>
|
|
#include <linux/swap.h>
|
|
#include <linux/vmstat.h>
|
|
#include <linux/atomic.h>
|
|
#include <linux/vmalloc.h>
|
|
#ifdef CONFIG_CMA
|
|
#include <linux/cma.h>
|
|
#endif
|
|
#include <linux/zswap.h>
|
|
#include <asm/page.h>
|
|
#include "internal.h"
|
|
|
|
void __attribute__((weak)) arch_report_meminfo(struct seq_file *m)
|
|
{
|
|
}
|
|
|
|
static void show_val_kb(struct seq_file *m, const char *s, unsigned long num)
|
|
{
|
|
seq_put_decimal_ull_width(m, s, num << (PAGE_SHIFT - 10), 8);
|
|
seq_write(m, " kB\n", 4);
|
|
}
|
|
|
|
static int meminfo_proc_show(struct seq_file *m, void *v)
|
|
{
|
|
struct sysinfo i;
|
|
unsigned long committed;
|
|
long cached;
|
|
long available;
|
|
unsigned long pages[NR_LRU_LISTS];
|
|
unsigned long sreclaimable, sunreclaim;
|
|
int lru;
|
|
|
|
si_meminfo(&i);
|
|
si_swapinfo(&i);
|
|
committed = vm_memory_committed();
|
|
|
|
cached = global_node_page_state(NR_FILE_PAGES) -
|
|
total_swapcache_pages() - i.bufferram;
|
|
if (cached < 0)
|
|
cached = 0;
|
|
|
|
for (lru = LRU_BASE; lru < NR_LRU_LISTS; lru++)
|
|
pages[lru] = global_node_page_state(NR_LRU_BASE + lru);
|
|
|
|
available = si_mem_available();
|
|
sreclaimable = global_node_page_state_pages(NR_SLAB_RECLAIMABLE_B);
|
|
sunreclaim = global_node_page_state_pages(NR_SLAB_UNRECLAIMABLE_B);
|
|
|
|
show_val_kb(m, "MemTotal: ", i.totalram);
|
|
show_val_kb(m, "MemFree: ", i.freeram);
|
|
show_val_kb(m, "MemAvailable: ", available);
|
|
show_val_kb(m, "Buffers: ", i.bufferram);
|
|
show_val_kb(m, "Cached: ", cached);
|
|
show_val_kb(m, "SwapCached: ", total_swapcache_pages());
|
|
show_val_kb(m, "Active: ", pages[LRU_ACTIVE_ANON] +
|
|
pages[LRU_ACTIVE_FILE]);
|
|
show_val_kb(m, "Inactive: ", pages[LRU_INACTIVE_ANON] +
|
|
pages[LRU_INACTIVE_FILE]);
|
|
show_val_kb(m, "Active(anon): ", pages[LRU_ACTIVE_ANON]);
|
|
show_val_kb(m, "Inactive(anon): ", pages[LRU_INACTIVE_ANON]);
|
|
show_val_kb(m, "Active(file): ", pages[LRU_ACTIVE_FILE]);
|
|
show_val_kb(m, "Inactive(file): ", pages[LRU_INACTIVE_FILE]);
|
|
show_val_kb(m, "Unevictable: ", pages[LRU_UNEVICTABLE]);
|
|
show_val_kb(m, "Mlocked: ", global_zone_page_state(NR_MLOCK));
|
|
|
|
#ifdef CONFIG_HIGHMEM
|
|
show_val_kb(m, "HighTotal: ", i.totalhigh);
|
|
show_val_kb(m, "HighFree: ", i.freehigh);
|
|
show_val_kb(m, "LowTotal: ", i.totalram - i.totalhigh);
|
|
show_val_kb(m, "LowFree: ", i.freeram - i.freehigh);
|
|
#endif
|
|
|
|
#ifndef CONFIG_MMU
|
|
show_val_kb(m, "MmapCopy: ",
|
|
(unsigned long)atomic_long_read(&mmap_pages_allocated));
|
|
#endif
|
|
|
|
show_val_kb(m, "SwapTotal: ", i.totalswap);
|
|
show_val_kb(m, "SwapFree: ", i.freeswap);
|
|
#ifdef CONFIG_ZSWAP
|
|
show_val_kb(m, "Zswap: ", zswap_total_pages());
|
|
seq_printf(m, "Zswapped: %8lu kB\n",
|
|
(unsigned long)atomic_read(&zswap_stored_pages) <<
|
|
(PAGE_SHIFT - 10));
|
|
#endif
|
|
show_val_kb(m, "Dirty: ",
|
|
global_node_page_state(NR_FILE_DIRTY));
|
|
show_val_kb(m, "Writeback: ",
|
|
global_node_page_state(NR_WRITEBACK));
|
|
show_val_kb(m, "AnonPages: ",
|
|
global_node_page_state(NR_ANON_MAPPED));
|
|
show_val_kb(m, "Mapped: ",
|
|
global_node_page_state(NR_FILE_MAPPED));
|
|
show_val_kb(m, "Shmem: ", i.sharedram);
|
|
show_val_kb(m, "KReclaimable: ", sreclaimable +
|
|
global_node_page_state(NR_KERNEL_MISC_RECLAIMABLE));
|
|
show_val_kb(m, "Slab: ", sreclaimable + sunreclaim);
|
|
show_val_kb(m, "SReclaimable: ", sreclaimable);
|
|
show_val_kb(m, "SUnreclaim: ", sunreclaim);
|
|
seq_printf(m, "KernelStack: %8lu kB\n",
|
|
global_node_page_state(NR_KERNEL_STACK_KB));
|
|
#ifdef CONFIG_SHADOW_CALL_STACK
|
|
seq_printf(m, "ShadowCallStack:%8lu kB\n",
|
|
global_node_page_state(NR_KERNEL_SCS_KB));
|
|
#endif
|
|
show_val_kb(m, "PageTables: ",
|
|
global_node_page_state(NR_PAGETABLE));
|
|
show_val_kb(m, "SecPageTables: ",
|
|
global_node_page_state(NR_SECONDARY_PAGETABLE));
|
|
|
|
show_val_kb(m, "NFS_Unstable: ", 0);
|
|
show_val_kb(m, "Bounce: ",
|
|
global_zone_page_state(NR_BOUNCE));
|
|
show_val_kb(m, "WritebackTmp: ",
|
|
global_node_page_state(NR_WRITEBACK_TEMP));
|
|
show_val_kb(m, "CommitLimit: ", vm_commit_limit());
|
|
show_val_kb(m, "Committed_AS: ", committed);
|
|
seq_printf(m, "VmallocTotal: %8lu kB\n",
|
|
(unsigned long)VMALLOC_TOTAL >> 10);
|
|
show_val_kb(m, "VmallocUsed: ", vmalloc_nr_pages());
|
|
show_val_kb(m, "VmallocChunk: ", 0ul);
|
|
show_val_kb(m, "Percpu: ", pcpu_nr_pages());
|
|
|
|
memtest_report_meminfo(m);
|
|
|
|
#ifdef CONFIG_MEMORY_FAILURE
|
|
seq_printf(m, "HardwareCorrupted: %5lu kB\n",
|
|
atomic_long_read(&num_poisoned_pages) << (PAGE_SHIFT - 10));
|
|
#endif
|
|
|
|
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
|
|
show_val_kb(m, "AnonHugePages: ",
|
|
global_node_page_state(NR_ANON_THPS));
|
|
show_val_kb(m, "ShmemHugePages: ",
|
|
global_node_page_state(NR_SHMEM_THPS));
|
|
show_val_kb(m, "ShmemPmdMapped: ",
|
|
global_node_page_state(NR_SHMEM_PMDMAPPED));
|
|
show_val_kb(m, "FileHugePages: ",
|
|
global_node_page_state(NR_FILE_THPS));
|
|
show_val_kb(m, "FilePmdMapped: ",
|
|
global_node_page_state(NR_FILE_PMDMAPPED));
|
|
#endif
|
|
|
|
#ifdef CONFIG_CMA
|
|
show_val_kb(m, "CmaTotal: ", totalcma_pages);
|
|
show_val_kb(m, "CmaFree: ",
|
|
global_zone_page_state(NR_FREE_CMA_PAGES));
|
|
#endif
|
|
|
|
#ifdef CONFIG_UNACCEPTED_MEMORY
|
|
show_val_kb(m, "Unaccepted: ",
|
|
global_zone_page_state(NR_UNACCEPTED));
|
|
#endif
|
|
|
|
hugetlb_report_meminfo(m);
|
|
|
|
arch_report_meminfo(m);
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int __init proc_meminfo_init(void)
|
|
{
|
|
struct proc_dir_entry *pde;
|
|
|
|
pde = proc_create_single("meminfo", 0, NULL, meminfo_proc_show);
|
|
pde_make_permanent(pde);
|
|
return 0;
|
|
}
|
|
fs_initcall(proc_meminfo_init);
|