16 hotfixes. All except one are for MM. 10 of these are cc:stable and
the others pertain to post-6.10 issues. As usual with these merges, singletons and doubletons all over the place, no identifiable-by-me theme. Please see the lovingly curated changelogs to get the skinny. -----BEGIN PGP SIGNATURE----- iHUEABYKAB0WIQTTMBEPP41GrTpTJgfdBJ7gKXxAjgUCZsFf8wAKCRDdBJ7gKXxA jvEUAP97y/sqKD8rQNc0R8fRGSPNPamwyok8RHwohb0JEHovlAD9HsQ9Ad57EpqR wBexMxJRFc7Dt73Tu6IkLQ1iNGqABAc= =8KNp -----END PGP SIGNATURE----- Merge tag 'mm-hotfixes-stable-2024-08-17-19-34' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm Pull misc fixes from Andrew Morton: "16 hotfixes. All except one are for MM. 10 of these are cc:stable and the others pertain to post-6.10 issues. As usual with these merges, singletons and doubletons all over the place, no identifiable-by-me theme. Please see the lovingly curated changelogs to get the skinny" * tag 'mm-hotfixes-stable-2024-08-17-19-34' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm: mm/migrate: fix deadlock in migrate_pages_batch() on large folios alloc_tag: mark pages reserved during CMA activation as not tagged alloc_tag: introduce clear_page_tag_ref() helper function crash: fix riscv64 crash memory reserve dead loop selftests: memfd_secret: don't build memfd_secret test on unsupported arches mm: fix endless reclaim on machines with unaccepted memory selftests/mm: compaction_test: fix off by one in check_compaction() mm/numa: no task_numa_fault() call if PMD is changed mm/numa: no task_numa_fault() call if PTE is changed mm/vmalloc: fix page mapping if vm_area_alloc_pages() with high order fallback to order 0 mm/memory-failure: use raw_spinlock_t in struct memory_failure_cpu mm: don't account memmap per-node mm: add system wide stats items category mm: don't account memmap on failure mm/hugetlb: fix hugetlb vs. core-mm PT locking mseal: fix is_madv_discard()
This commit is contained in:
commit
c3f2d783a4
@ -944,10 +944,37 @@ static inline bool htlb_allow_alloc_fallback(int reason)
|
||||
static inline spinlock_t *huge_pte_lockptr(struct hstate *h,
|
||||
struct mm_struct *mm, pte_t *pte)
|
||||
{
|
||||
if (huge_page_size(h) == PMD_SIZE)
|
||||
const unsigned long size = huge_page_size(h);
|
||||
|
||||
VM_WARN_ON(size == PAGE_SIZE);
|
||||
|
||||
/*
|
||||
* hugetlb must use the exact same PT locks as core-mm page table
|
||||
* walkers would. When modifying a PTE table, hugetlb must take the
|
||||
* PTE PT lock, when modifying a PMD table, hugetlb must take the PMD
|
||||
* PT lock etc.
|
||||
*
|
||||
* The expectation is that any hugetlb folio smaller than a PMD is
|
||||
* always mapped into a single PTE table and that any hugetlb folio
|
||||
* smaller than a PUD (but at least as big as a PMD) is always mapped
|
||||
* into a single PMD table.
|
||||
*
|
||||
* If that does not hold for an architecture, then that architecture
|
||||
* must disable split PT locks such that all *_lockptr() functions
|
||||
* will give us the same result: the per-MM PT lock.
|
||||
*
|
||||
* Note that with e.g., CONFIG_PGTABLE_LEVELS=2 where
|
||||
* PGDIR_SIZE==P4D_SIZE==PUD_SIZE==PMD_SIZE, we'd use pud_lockptr()
|
||||
* and core-mm would use pmd_lockptr(). However, in such configurations
|
||||
* split PMD locks are disabled -- they don't make sense on a single
|
||||
* PGDIR page table -- and the end result is the same.
|
||||
*/
|
||||
if (size >= PUD_SIZE)
|
||||
return pud_lockptr(mm, (pud_t *) pte);
|
||||
else if (size >= PMD_SIZE || IS_ENABLED(CONFIG_HIGHPTE))
|
||||
return pmd_lockptr(mm, (pmd_t *) pte);
|
||||
VM_BUG_ON(huge_page_size(h) == PAGE_SIZE);
|
||||
return &mm->page_table_lock;
|
||||
/* pte_alloc_huge() only applies with !CONFIG_HIGHPTE */
|
||||
return ptep_lockptr(mm, pte);
|
||||
}
|
||||
|
||||
#ifndef hugepages_supported
|
||||
|
@ -2920,6 +2920,13 @@ static inline spinlock_t *pte_lockptr(struct mm_struct *mm, pmd_t *pmd)
|
||||
return ptlock_ptr(page_ptdesc(pmd_page(*pmd)));
|
||||
}
|
||||
|
||||
static inline spinlock_t *ptep_lockptr(struct mm_struct *mm, pte_t *pte)
|
||||
{
|
||||
BUILD_BUG_ON(IS_ENABLED(CONFIG_HIGHPTE));
|
||||
BUILD_BUG_ON(MAX_PTRS_PER_PTE * sizeof(pte_t) > PAGE_SIZE);
|
||||
return ptlock_ptr(virt_to_ptdesc(pte));
|
||||
}
|
||||
|
||||
static inline bool ptlock_init(struct ptdesc *ptdesc)
|
||||
{
|
||||
/*
|
||||
@ -2944,6 +2951,10 @@ static inline spinlock_t *pte_lockptr(struct mm_struct *mm, pmd_t *pmd)
|
||||
{
|
||||
return &mm->page_table_lock;
|
||||
}
|
||||
static inline spinlock_t *ptep_lockptr(struct mm_struct *mm, pte_t *pte)
|
||||
{
|
||||
return &mm->page_table_lock;
|
||||
}
|
||||
static inline void ptlock_cache_init(void) {}
|
||||
static inline bool ptlock_init(struct ptdesc *ptdesc) { return true; }
|
||||
static inline void ptlock_free(struct ptdesc *ptdesc) {}
|
||||
|
@ -220,8 +220,6 @@ enum node_stat_item {
|
||||
PGDEMOTE_KSWAPD,
|
||||
PGDEMOTE_DIRECT,
|
||||
PGDEMOTE_KHUGEPAGED,
|
||||
NR_MEMMAP, /* page metadata allocated through buddy allocator */
|
||||
NR_MEMMAP_BOOT, /* page metadata allocated through boot allocator */
|
||||
NR_VM_NODE_STAT_ITEMS
|
||||
};
|
||||
|
||||
|
@ -43,6 +43,18 @@ static inline void put_page_tag_ref(union codetag_ref *ref)
|
||||
page_ext_put(page_ext_from_codetag_ref(ref));
|
||||
}
|
||||
|
||||
static inline void clear_page_tag_ref(struct page *page)
|
||||
{
|
||||
if (mem_alloc_profiling_enabled()) {
|
||||
union codetag_ref *ref = get_page_tag_ref(page);
|
||||
|
||||
if (ref) {
|
||||
set_codetag_empty(ref);
|
||||
put_page_tag_ref(ref);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static inline void pgalloc_tag_add(struct page *page, struct task_struct *task,
|
||||
unsigned int nr)
|
||||
{
|
||||
@ -126,6 +138,7 @@ static inline void pgalloc_tag_sub_pages(struct alloc_tag *tag, unsigned int nr)
|
||||
|
||||
static inline union codetag_ref *get_page_tag_ref(struct page *page) { return NULL; }
|
||||
static inline void put_page_tag_ref(union codetag_ref *ref) {}
|
||||
static inline void clear_page_tag_ref(struct page *page) {}
|
||||
static inline void pgalloc_tag_add(struct page *page, struct task_struct *task,
|
||||
unsigned int nr) {}
|
||||
static inline void pgalloc_tag_sub(struct page *page, unsigned int nr) {}
|
||||
|
@ -34,10 +34,13 @@ struct reclaim_stat {
|
||||
unsigned nr_lazyfree_fail;
|
||||
};
|
||||
|
||||
enum writeback_stat_item {
|
||||
/* Stat data for system wide items */
|
||||
enum vm_stat_item {
|
||||
NR_DIRTY_THRESHOLD,
|
||||
NR_DIRTY_BG_THRESHOLD,
|
||||
NR_VM_WRITEBACK_STAT_ITEMS,
|
||||
NR_MEMMAP_PAGES, /* page metadata allocated through buddy allocator */
|
||||
NR_MEMMAP_BOOT_PAGES, /* page metadata allocated through boot allocator */
|
||||
NR_VM_STAT_ITEMS,
|
||||
};
|
||||
|
||||
#ifdef CONFIG_VM_EVENT_COUNTERS
|
||||
@ -514,21 +517,13 @@ static inline const char *lru_list_name(enum lru_list lru)
|
||||
return node_stat_name(NR_LRU_BASE + lru) + 3; // skip "nr_"
|
||||
}
|
||||
|
||||
static inline const char *writeback_stat_name(enum writeback_stat_item item)
|
||||
{
|
||||
return vmstat_text[NR_VM_ZONE_STAT_ITEMS +
|
||||
NR_VM_NUMA_EVENT_ITEMS +
|
||||
NR_VM_NODE_STAT_ITEMS +
|
||||
item];
|
||||
}
|
||||
|
||||
#if defined(CONFIG_VM_EVENT_COUNTERS) || defined(CONFIG_MEMCG)
|
||||
static inline const char *vm_event_name(enum vm_event_item item)
|
||||
{
|
||||
return vmstat_text[NR_VM_ZONE_STAT_ITEMS +
|
||||
NR_VM_NUMA_EVENT_ITEMS +
|
||||
NR_VM_NODE_STAT_ITEMS +
|
||||
NR_VM_WRITEBACK_STAT_ITEMS +
|
||||
NR_VM_STAT_ITEMS +
|
||||
item];
|
||||
}
|
||||
#endif /* CONFIG_VM_EVENT_COUNTERS || CONFIG_MEMCG */
|
||||
@ -625,7 +620,6 @@ static inline void lruvec_stat_sub_folio(struct folio *folio,
|
||||
lruvec_stat_mod_folio(folio, idx, -folio_nr_pages(folio));
|
||||
}
|
||||
|
||||
void __meminit mod_node_early_perpage_metadata(int nid, long delta);
|
||||
void __meminit store_early_perpage_metadata(void);
|
||||
|
||||
void memmap_boot_pages_add(long delta);
|
||||
void memmap_pages_add(long delta);
|
||||
#endif /* _LINUX_VMSTAT_H */
|
||||
|
@ -423,7 +423,8 @@ retry:
|
||||
if (high && search_end == CRASH_ADDR_HIGH_MAX) {
|
||||
search_end = CRASH_ADDR_LOW_MAX;
|
||||
search_base = 0;
|
||||
goto retry;
|
||||
if (search_end != CRASH_ADDR_HIGH_MAX)
|
||||
goto retry;
|
||||
}
|
||||
pr_warn("cannot allocate crashkernel (size:0x%llx)\n",
|
||||
crash_size);
|
||||
|
@ -1685,7 +1685,7 @@ vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf)
|
||||
vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd);
|
||||
if (unlikely(!pmd_same(oldpmd, *vmf->pmd))) {
|
||||
spin_unlock(vmf->ptl);
|
||||
goto out;
|
||||
return 0;
|
||||
}
|
||||
|
||||
pmd = pmd_modify(oldpmd, vma->vm_page_prot);
|
||||
@ -1728,22 +1728,16 @@ vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf)
|
||||
if (!migrate_misplaced_folio(folio, vma, target_nid)) {
|
||||
flags |= TNF_MIGRATED;
|
||||
nid = target_nid;
|
||||
} else {
|
||||
flags |= TNF_MIGRATE_FAIL;
|
||||
vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd);
|
||||
if (unlikely(!pmd_same(oldpmd, *vmf->pmd))) {
|
||||
spin_unlock(vmf->ptl);
|
||||
goto out;
|
||||
}
|
||||
goto out_map;
|
||||
task_numa_fault(last_cpupid, nid, HPAGE_PMD_NR, flags);
|
||||
return 0;
|
||||
}
|
||||
|
||||
out:
|
||||
if (nid != NUMA_NO_NODE)
|
||||
task_numa_fault(last_cpupid, nid, HPAGE_PMD_NR, flags);
|
||||
|
||||
return 0;
|
||||
|
||||
flags |= TNF_MIGRATE_FAIL;
|
||||
vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd);
|
||||
if (unlikely(!pmd_same(oldpmd, *vmf->pmd))) {
|
||||
spin_unlock(vmf->ptl);
|
||||
return 0;
|
||||
}
|
||||
out_map:
|
||||
/* Restore the PMD */
|
||||
pmd = pmd_modify(oldpmd, vma->vm_page_prot);
|
||||
@ -1753,7 +1747,10 @@ out_map:
|
||||
set_pmd_at(vma->vm_mm, haddr, vmf->pmd, pmd);
|
||||
update_mmu_cache_pmd(vma, vmf->address, vmf->pmd);
|
||||
spin_unlock(vmf->ptl);
|
||||
goto out;
|
||||
|
||||
if (nid != NUMA_NO_NODE)
|
||||
task_numa_fault(last_cpupid, nid, HPAGE_PMD_NR, flags);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -185,11 +185,11 @@ static int vmemmap_remap_range(unsigned long start, unsigned long end,
|
||||
static inline void free_vmemmap_page(struct page *page)
|
||||
{
|
||||
if (PageReserved(page)) {
|
||||
memmap_boot_pages_add(-1);
|
||||
free_bootmem_page(page);
|
||||
mod_node_page_state(page_pgdat(page), NR_MEMMAP_BOOT, -1);
|
||||
} else {
|
||||
memmap_pages_add(-1);
|
||||
__free_page(page);
|
||||
mod_node_page_state(page_pgdat(page), NR_MEMMAP, -1);
|
||||
}
|
||||
}
|
||||
|
||||
@ -341,7 +341,7 @@ static int vmemmap_remap_free(unsigned long start, unsigned long end,
|
||||
copy_page(page_to_virt(walk.reuse_page),
|
||||
(void *)walk.reuse_addr);
|
||||
list_add(&walk.reuse_page->lru, vmemmap_pages);
|
||||
mod_node_page_state(NODE_DATA(nid), NR_MEMMAP, 1);
|
||||
memmap_pages_add(1);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -392,14 +392,11 @@ static int alloc_vmemmap_page_list(unsigned long start, unsigned long end,
|
||||
|
||||
for (i = 0; i < nr_pages; i++) {
|
||||
page = alloc_pages_node(nid, gfp_mask, 0);
|
||||
if (!page) {
|
||||
mod_node_page_state(NODE_DATA(nid), NR_MEMMAP, i);
|
||||
if (!page)
|
||||
goto out;
|
||||
}
|
||||
list_add(&page->lru, list);
|
||||
}
|
||||
|
||||
mod_node_page_state(NODE_DATA(nid), NR_MEMMAP, nr_pages);
|
||||
memmap_pages_add(nr_pages);
|
||||
|
||||
return 0;
|
||||
out:
|
||||
|
@ -2417,7 +2417,7 @@ struct memory_failure_entry {
|
||||
struct memory_failure_cpu {
|
||||
DECLARE_KFIFO(fifo, struct memory_failure_entry,
|
||||
MEMORY_FAILURE_FIFO_SIZE);
|
||||
spinlock_t lock;
|
||||
raw_spinlock_t lock;
|
||||
struct work_struct work;
|
||||
};
|
||||
|
||||
@ -2443,20 +2443,22 @@ void memory_failure_queue(unsigned long pfn, int flags)
|
||||
{
|
||||
struct memory_failure_cpu *mf_cpu;
|
||||
unsigned long proc_flags;
|
||||
bool buffer_overflow;
|
||||
struct memory_failure_entry entry = {
|
||||
.pfn = pfn,
|
||||
.flags = flags,
|
||||
};
|
||||
|
||||
mf_cpu = &get_cpu_var(memory_failure_cpu);
|
||||
spin_lock_irqsave(&mf_cpu->lock, proc_flags);
|
||||
if (kfifo_put(&mf_cpu->fifo, entry))
|
||||
raw_spin_lock_irqsave(&mf_cpu->lock, proc_flags);
|
||||
buffer_overflow = !kfifo_put(&mf_cpu->fifo, entry);
|
||||
if (!buffer_overflow)
|
||||
schedule_work_on(smp_processor_id(), &mf_cpu->work);
|
||||
else
|
||||
raw_spin_unlock_irqrestore(&mf_cpu->lock, proc_flags);
|
||||
put_cpu_var(memory_failure_cpu);
|
||||
if (buffer_overflow)
|
||||
pr_err("buffer overflow when queuing memory failure at %#lx\n",
|
||||
pfn);
|
||||
spin_unlock_irqrestore(&mf_cpu->lock, proc_flags);
|
||||
put_cpu_var(memory_failure_cpu);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(memory_failure_queue);
|
||||
|
||||
@ -2469,9 +2471,9 @@ static void memory_failure_work_func(struct work_struct *work)
|
||||
|
||||
mf_cpu = container_of(work, struct memory_failure_cpu, work);
|
||||
for (;;) {
|
||||
spin_lock_irqsave(&mf_cpu->lock, proc_flags);
|
||||
raw_spin_lock_irqsave(&mf_cpu->lock, proc_flags);
|
||||
gotten = kfifo_get(&mf_cpu->fifo, &entry);
|
||||
spin_unlock_irqrestore(&mf_cpu->lock, proc_flags);
|
||||
raw_spin_unlock_irqrestore(&mf_cpu->lock, proc_flags);
|
||||
if (!gotten)
|
||||
break;
|
||||
if (entry.flags & MF_SOFT_OFFLINE)
|
||||
@ -2501,7 +2503,7 @@ static int __init memory_failure_init(void)
|
||||
|
||||
for_each_possible_cpu(cpu) {
|
||||
mf_cpu = &per_cpu(memory_failure_cpu, cpu);
|
||||
spin_lock_init(&mf_cpu->lock);
|
||||
raw_spin_lock_init(&mf_cpu->lock);
|
||||
INIT_KFIFO(mf_cpu->fifo);
|
||||
INIT_WORK(&mf_cpu->work, memory_failure_work_func);
|
||||
}
|
||||
|
33
mm/memory.c
33
mm/memory.c
@ -5295,7 +5295,7 @@ static vm_fault_t do_numa_page(struct vm_fault *vmf)
|
||||
|
||||
if (unlikely(!pte_same(old_pte, vmf->orig_pte))) {
|
||||
pte_unmap_unlock(vmf->pte, vmf->ptl);
|
||||
goto out;
|
||||
return 0;
|
||||
}
|
||||
|
||||
pte = pte_modify(old_pte, vma->vm_page_prot);
|
||||
@ -5358,23 +5358,19 @@ static vm_fault_t do_numa_page(struct vm_fault *vmf)
|
||||
if (!migrate_misplaced_folio(folio, vma, target_nid)) {
|
||||
nid = target_nid;
|
||||
flags |= TNF_MIGRATED;
|
||||
} else {
|
||||
flags |= TNF_MIGRATE_FAIL;
|
||||
vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd,
|
||||
vmf->address, &vmf->ptl);
|
||||
if (unlikely(!vmf->pte))
|
||||
goto out;
|
||||
if (unlikely(!pte_same(ptep_get(vmf->pte), vmf->orig_pte))) {
|
||||
pte_unmap_unlock(vmf->pte, vmf->ptl);
|
||||
goto out;
|
||||
}
|
||||
goto out_map;
|
||||
task_numa_fault(last_cpupid, nid, nr_pages, flags);
|
||||
return 0;
|
||||
}
|
||||
|
||||
out:
|
||||
if (nid != NUMA_NO_NODE)
|
||||
task_numa_fault(last_cpupid, nid, nr_pages, flags);
|
||||
return 0;
|
||||
flags |= TNF_MIGRATE_FAIL;
|
||||
vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd,
|
||||
vmf->address, &vmf->ptl);
|
||||
if (unlikely(!vmf->pte))
|
||||
return 0;
|
||||
if (unlikely(!pte_same(ptep_get(vmf->pte), vmf->orig_pte))) {
|
||||
pte_unmap_unlock(vmf->pte, vmf->ptl);
|
||||
return 0;
|
||||
}
|
||||
out_map:
|
||||
/*
|
||||
* Make it present again, depending on how arch implements
|
||||
@ -5387,7 +5383,10 @@ out_map:
|
||||
numa_rebuild_single_mapping(vmf, vma, vmf->address, vmf->pte,
|
||||
writable);
|
||||
pte_unmap_unlock(vmf->pte, vmf->ptl);
|
||||
goto out;
|
||||
|
||||
if (nid != NUMA_NO_NODE)
|
||||
task_numa_fault(last_cpupid, nid, nr_pages, flags);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline vm_fault_t create_huge_pmd(struct vm_fault *vmf)
|
||||
|
16
mm/migrate.c
16
mm/migrate.c
@ -1479,11 +1479,17 @@ out:
|
||||
return rc;
|
||||
}
|
||||
|
||||
static inline int try_split_folio(struct folio *folio, struct list_head *split_folios)
|
||||
static inline int try_split_folio(struct folio *folio, struct list_head *split_folios,
|
||||
enum migrate_mode mode)
|
||||
{
|
||||
int rc;
|
||||
|
||||
folio_lock(folio);
|
||||
if (mode == MIGRATE_ASYNC) {
|
||||
if (!folio_trylock(folio))
|
||||
return -EAGAIN;
|
||||
} else {
|
||||
folio_lock(folio);
|
||||
}
|
||||
rc = split_folio_to_list(folio, split_folios);
|
||||
folio_unlock(folio);
|
||||
if (!rc)
|
||||
@ -1677,7 +1683,7 @@ static int migrate_pages_batch(struct list_head *from,
|
||||
*/
|
||||
if (nr_pages > 2 &&
|
||||
!list_empty(&folio->_deferred_list)) {
|
||||
if (try_split_folio(folio, split_folios) == 0) {
|
||||
if (!try_split_folio(folio, split_folios, mode)) {
|
||||
nr_failed++;
|
||||
stats->nr_thp_failed += is_thp;
|
||||
stats->nr_thp_split += is_thp;
|
||||
@ -1699,7 +1705,7 @@ static int migrate_pages_batch(struct list_head *from,
|
||||
if (!thp_migration_supported() && is_thp) {
|
||||
nr_failed++;
|
||||
stats->nr_thp_failed++;
|
||||
if (!try_split_folio(folio, split_folios)) {
|
||||
if (!try_split_folio(folio, split_folios, mode)) {
|
||||
stats->nr_thp_split++;
|
||||
stats->nr_split++;
|
||||
continue;
|
||||
@ -1731,7 +1737,7 @@ static int migrate_pages_batch(struct list_head *from,
|
||||
stats->nr_thp_failed += is_thp;
|
||||
/* Large folio NUMA faulting doesn't split to retry. */
|
||||
if (is_large && !nosplit) {
|
||||
int ret = try_split_folio(folio, split_folios);
|
||||
int ret = try_split_folio(folio, split_folios, mode);
|
||||
|
||||
if (!ret) {
|
||||
stats->nr_thp_split += is_thp;
|
||||
|
15
mm/mm_init.c
15
mm/mm_init.c
@ -1623,8 +1623,7 @@ static void __init alloc_node_mem_map(struct pglist_data *pgdat)
|
||||
panic("Failed to allocate %ld bytes for node %d memory map\n",
|
||||
size, pgdat->node_id);
|
||||
pgdat->node_mem_map = map + offset;
|
||||
mod_node_early_perpage_metadata(pgdat->node_id,
|
||||
DIV_ROUND_UP(size, PAGE_SIZE));
|
||||
memmap_boot_pages_add(DIV_ROUND_UP(size, PAGE_SIZE));
|
||||
pr_debug("%s: node %d, pgdat %08lx, node_mem_map %08lx\n",
|
||||
__func__, pgdat->node_id, (unsigned long)pgdat,
|
||||
(unsigned long)pgdat->node_mem_map);
|
||||
@ -2245,6 +2244,8 @@ void __init init_cma_reserved_pageblock(struct page *page)
|
||||
|
||||
set_pageblock_migratetype(page, MIGRATE_CMA);
|
||||
set_page_refcounted(page);
|
||||
/* pages were reserved and not allocated */
|
||||
clear_page_tag_ref(page);
|
||||
__free_pages(page, pageblock_order);
|
||||
|
||||
adjust_managed_page_count(page, pageblock_nr_pages);
|
||||
@ -2460,15 +2461,7 @@ void __init memblock_free_pages(struct page *page, unsigned long pfn,
|
||||
}
|
||||
|
||||
/* pages were reserved and not allocated */
|
||||
if (mem_alloc_profiling_enabled()) {
|
||||
union codetag_ref *ref = get_page_tag_ref(page);
|
||||
|
||||
if (ref) {
|
||||
set_codetag_empty(ref);
|
||||
put_page_tag_ref(ref);
|
||||
}
|
||||
}
|
||||
|
||||
clear_page_tag_ref(page);
|
||||
__free_pages_core(page, order, MEMINIT_EARLY);
|
||||
}
|
||||
|
||||
|
14
mm/mseal.c
14
mm/mseal.c
@ -40,9 +40,17 @@ static bool can_modify_vma(struct vm_area_struct *vma)
|
||||
|
||||
static bool is_madv_discard(int behavior)
|
||||
{
|
||||
return behavior &
|
||||
(MADV_FREE | MADV_DONTNEED | MADV_DONTNEED_LOCKED |
|
||||
MADV_REMOVE | MADV_DONTFORK | MADV_WIPEONFORK);
|
||||
switch (behavior) {
|
||||
case MADV_FREE:
|
||||
case MADV_DONTNEED:
|
||||
case MADV_DONTNEED_LOCKED:
|
||||
case MADV_REMOVE:
|
||||
case MADV_DONTFORK:
|
||||
case MADV_WIPEONFORK:
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool is_ro_anon(struct vm_area_struct *vma)
|
||||
|
@ -287,7 +287,7 @@ EXPORT_SYMBOL(nr_online_nodes);
|
||||
|
||||
static bool page_contains_unaccepted(struct page *page, unsigned int order);
|
||||
static void accept_page(struct page *page, unsigned int order);
|
||||
static bool try_to_accept_memory(struct zone *zone, unsigned int order);
|
||||
static bool cond_accept_memory(struct zone *zone, unsigned int order);
|
||||
static inline bool has_unaccepted_memory(void);
|
||||
static bool __free_unaccepted(struct page *page);
|
||||
|
||||
@ -3072,9 +3072,6 @@ static inline long __zone_watermark_unusable_free(struct zone *z,
|
||||
if (!(alloc_flags & ALLOC_CMA))
|
||||
unusable_free += zone_page_state(z, NR_FREE_CMA_PAGES);
|
||||
#endif
|
||||
#ifdef CONFIG_UNACCEPTED_MEMORY
|
||||
unusable_free += zone_page_state(z, NR_UNACCEPTED);
|
||||
#endif
|
||||
|
||||
return unusable_free;
|
||||
}
|
||||
@ -3368,6 +3365,8 @@ retry:
|
||||
}
|
||||
}
|
||||
|
||||
cond_accept_memory(zone, order);
|
||||
|
||||
/*
|
||||
* Detect whether the number of free pages is below high
|
||||
* watermark. If so, we will decrease pcp->high and free
|
||||
@ -3393,10 +3392,8 @@ check_alloc_wmark:
|
||||
gfp_mask)) {
|
||||
int ret;
|
||||
|
||||
if (has_unaccepted_memory()) {
|
||||
if (try_to_accept_memory(zone, order))
|
||||
goto try_this_zone;
|
||||
}
|
||||
if (cond_accept_memory(zone, order))
|
||||
goto try_this_zone;
|
||||
|
||||
#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
|
||||
/*
|
||||
@ -3450,10 +3447,8 @@ try_this_zone:
|
||||
|
||||
return page;
|
||||
} else {
|
||||
if (has_unaccepted_memory()) {
|
||||
if (try_to_accept_memory(zone, order))
|
||||
goto try_this_zone;
|
||||
}
|
||||
if (cond_accept_memory(zone, order))
|
||||
goto try_this_zone;
|
||||
|
||||
#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
|
||||
/* Try again if zone has deferred pages */
|
||||
@ -5755,7 +5750,6 @@ void __init setup_per_cpu_pageset(void)
|
||||
for_each_online_pgdat(pgdat)
|
||||
pgdat->per_cpu_nodestats =
|
||||
alloc_percpu(struct per_cpu_nodestat);
|
||||
store_early_perpage_metadata();
|
||||
}
|
||||
|
||||
__meminit void zone_pcp_init(struct zone *zone)
|
||||
@ -5821,14 +5815,7 @@ unsigned long free_reserved_area(void *start, void *end, int poison, const char
|
||||
|
||||
void free_reserved_page(struct page *page)
|
||||
{
|
||||
if (mem_alloc_profiling_enabled()) {
|
||||
union codetag_ref *ref = get_page_tag_ref(page);
|
||||
|
||||
if (ref) {
|
||||
set_codetag_empty(ref);
|
||||
put_page_tag_ref(ref);
|
||||
}
|
||||
}
|
||||
clear_page_tag_ref(page);
|
||||
ClearPageReserved(page);
|
||||
init_page_count(page);
|
||||
__free_page(page);
|
||||
@ -6951,9 +6938,6 @@ static bool try_to_accept_memory_one(struct zone *zone)
|
||||
struct page *page;
|
||||
bool last;
|
||||
|
||||
if (list_empty(&zone->unaccepted_pages))
|
||||
return false;
|
||||
|
||||
spin_lock_irqsave(&zone->lock, flags);
|
||||
page = list_first_entry_or_null(&zone->unaccepted_pages,
|
||||
struct page, lru);
|
||||
@ -6979,23 +6963,29 @@ static bool try_to_accept_memory_one(struct zone *zone)
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool try_to_accept_memory(struct zone *zone, unsigned int order)
|
||||
static bool cond_accept_memory(struct zone *zone, unsigned int order)
|
||||
{
|
||||
long to_accept;
|
||||
int ret = false;
|
||||
bool ret = false;
|
||||
|
||||
if (!has_unaccepted_memory())
|
||||
return false;
|
||||
|
||||
if (list_empty(&zone->unaccepted_pages))
|
||||
return false;
|
||||
|
||||
/* How much to accept to get to high watermark? */
|
||||
to_accept = high_wmark_pages(zone) -
|
||||
(zone_page_state(zone, NR_FREE_PAGES) -
|
||||
__zone_watermark_unusable_free(zone, order, 0));
|
||||
__zone_watermark_unusable_free(zone, order, 0) -
|
||||
zone_page_state(zone, NR_UNACCEPTED));
|
||||
|
||||
/* Accept at least one page */
|
||||
do {
|
||||
while (to_accept > 0) {
|
||||
if (!try_to_accept_memory_one(zone))
|
||||
break;
|
||||
ret = true;
|
||||
to_accept -= MAX_ORDER_NR_PAGES;
|
||||
} while (to_accept > 0);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
@ -7038,7 +7028,7 @@ static void accept_page(struct page *page, unsigned int order)
|
||||
{
|
||||
}
|
||||
|
||||
static bool try_to_accept_memory(struct zone *zone, unsigned int order)
|
||||
static bool cond_accept_memory(struct zone *zone, unsigned int order)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
@ -214,8 +214,7 @@ static int __init alloc_node_page_ext(int nid)
|
||||
return -ENOMEM;
|
||||
NODE_DATA(nid)->node_page_ext = base;
|
||||
total_usage += table_size;
|
||||
mod_node_page_state(NODE_DATA(nid), NR_MEMMAP_BOOT,
|
||||
DIV_ROUND_UP(table_size, PAGE_SIZE));
|
||||
memmap_boot_pages_add(DIV_ROUND_UP(table_size, PAGE_SIZE));
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -275,10 +274,8 @@ static void *__meminit alloc_page_ext(size_t size, int nid)
|
||||
else
|
||||
addr = vzalloc_node(size, nid);
|
||||
|
||||
if (addr) {
|
||||
mod_node_page_state(NODE_DATA(nid), NR_MEMMAP,
|
||||
DIV_ROUND_UP(size, PAGE_SIZE));
|
||||
}
|
||||
if (addr)
|
||||
memmap_pages_add(DIV_ROUND_UP(size, PAGE_SIZE));
|
||||
|
||||
return addr;
|
||||
}
|
||||
@ -323,25 +320,18 @@ static void free_page_ext(void *addr)
|
||||
{
|
||||
size_t table_size;
|
||||
struct page *page;
|
||||
struct pglist_data *pgdat;
|
||||
|
||||
table_size = page_ext_size * PAGES_PER_SECTION;
|
||||
memmap_pages_add(-1L * (DIV_ROUND_UP(table_size, PAGE_SIZE)));
|
||||
|
||||
if (is_vmalloc_addr(addr)) {
|
||||
page = vmalloc_to_page(addr);
|
||||
pgdat = page_pgdat(page);
|
||||
vfree(addr);
|
||||
} else {
|
||||
page = virt_to_page(addr);
|
||||
pgdat = page_pgdat(page);
|
||||
BUG_ON(PageReserved(page));
|
||||
kmemleak_free(addr);
|
||||
free_pages_exact(addr, table_size);
|
||||
}
|
||||
|
||||
mod_node_page_state(pgdat, NR_MEMMAP,
|
||||
-1L * (DIV_ROUND_UP(table_size, PAGE_SIZE)));
|
||||
|
||||
}
|
||||
|
||||
static void __free_page_ext(unsigned long pfn)
|
||||
|
@ -469,13 +469,10 @@ struct page * __meminit __populate_section_memmap(unsigned long pfn,
|
||||
if (r < 0)
|
||||
return NULL;
|
||||
|
||||
if (system_state == SYSTEM_BOOTING) {
|
||||
mod_node_early_perpage_metadata(nid, DIV_ROUND_UP(end - start,
|
||||
PAGE_SIZE));
|
||||
} else {
|
||||
mod_node_page_state(NODE_DATA(nid), NR_MEMMAP,
|
||||
DIV_ROUND_UP(end - start, PAGE_SIZE));
|
||||
}
|
||||
if (system_state == SYSTEM_BOOTING)
|
||||
memmap_boot_pages_add(DIV_ROUND_UP(end - start, PAGE_SIZE));
|
||||
else
|
||||
memmap_pages_add(DIV_ROUND_UP(end - start, PAGE_SIZE));
|
||||
|
||||
return pfn_to_page(pfn);
|
||||
}
|
||||
|
@ -463,7 +463,7 @@ static void __init sparse_buffer_init(unsigned long size, int nid)
|
||||
sparsemap_buf = memmap_alloc(size, section_map_size(), addr, nid, true);
|
||||
sparsemap_buf_end = sparsemap_buf + size;
|
||||
#ifndef CONFIG_SPARSEMEM_VMEMMAP
|
||||
mod_node_early_perpage_metadata(nid, DIV_ROUND_UP(size, PAGE_SIZE));
|
||||
memmap_boot_pages_add(DIV_ROUND_UP(size, PAGE_SIZE));
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -643,8 +643,7 @@ static void depopulate_section_memmap(unsigned long pfn, unsigned long nr_pages,
|
||||
unsigned long start = (unsigned long) pfn_to_page(pfn);
|
||||
unsigned long end = start + nr_pages * sizeof(struct page);
|
||||
|
||||
mod_node_page_state(page_pgdat(pfn_to_page(pfn)), NR_MEMMAP,
|
||||
-1L * (DIV_ROUND_UP(end - start, PAGE_SIZE)));
|
||||
memmap_pages_add(-1L * (DIV_ROUND_UP(end - start, PAGE_SIZE)));
|
||||
vmemmap_free(start, end, altmap);
|
||||
}
|
||||
static void free_map_bootmem(struct page *memmap)
|
||||
|
11
mm/vmalloc.c
11
mm/vmalloc.c
@ -3584,15 +3584,8 @@ vm_area_alloc_pages(gfp_t gfp, int nid,
|
||||
page = alloc_pages_noprof(alloc_gfp, order);
|
||||
else
|
||||
page = alloc_pages_node_noprof(nid, alloc_gfp, order);
|
||||
if (unlikely(!page)) {
|
||||
if (!nofail)
|
||||
break;
|
||||
|
||||
/* fall back to the zero order allocations */
|
||||
alloc_gfp |= __GFP_NOFAIL;
|
||||
order = 0;
|
||||
continue;
|
||||
}
|
||||
if (unlikely(!page))
|
||||
break;
|
||||
|
||||
/*
|
||||
* Higher order allocations must be able to be treated as
|
||||
|
52
mm/vmstat.c
52
mm/vmstat.c
@ -1033,6 +1033,24 @@ unsigned long node_page_state(struct pglist_data *pgdat,
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Count number of pages "struct page" and "struct page_ext" consume.
|
||||
* nr_memmap_boot_pages: # of pages allocated by boot allocator
|
||||
* nr_memmap_pages: # of pages that were allocated by buddy allocator
|
||||
*/
|
||||
static atomic_long_t nr_memmap_boot_pages = ATOMIC_LONG_INIT(0);
|
||||
static atomic_long_t nr_memmap_pages = ATOMIC_LONG_INIT(0);
|
||||
|
||||
void memmap_boot_pages_add(long delta)
|
||||
{
|
||||
atomic_long_add(delta, &nr_memmap_boot_pages);
|
||||
}
|
||||
|
||||
void memmap_pages_add(long delta)
|
||||
{
|
||||
atomic_long_add(delta, &nr_memmap_pages);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_COMPACTION
|
||||
|
||||
struct contig_page_info {
|
||||
@ -1255,11 +1273,11 @@ const char * const vmstat_text[] = {
|
||||
"pgdemote_kswapd",
|
||||
"pgdemote_direct",
|
||||
"pgdemote_khugepaged",
|
||||
"nr_memmap",
|
||||
"nr_memmap_boot",
|
||||
/* enum writeback_stat_item counters */
|
||||
/* system-wide enum vm_stat_item counters */
|
||||
"nr_dirty_threshold",
|
||||
"nr_dirty_background_threshold",
|
||||
"nr_memmap_pages",
|
||||
"nr_memmap_boot_pages",
|
||||
|
||||
#if defined(CONFIG_VM_EVENT_COUNTERS) || defined(CONFIG_MEMCG)
|
||||
/* enum vm_event_item counters */
|
||||
@ -1790,7 +1808,7 @@ static const struct seq_operations zoneinfo_op = {
|
||||
#define NR_VMSTAT_ITEMS (NR_VM_ZONE_STAT_ITEMS + \
|
||||
NR_VM_NUMA_EVENT_ITEMS + \
|
||||
NR_VM_NODE_STAT_ITEMS + \
|
||||
NR_VM_WRITEBACK_STAT_ITEMS + \
|
||||
NR_VM_STAT_ITEMS + \
|
||||
(IS_ENABLED(CONFIG_VM_EVENT_COUNTERS) ? \
|
||||
NR_VM_EVENT_ITEMS : 0))
|
||||
|
||||
@ -1827,7 +1845,9 @@ static void *vmstat_start(struct seq_file *m, loff_t *pos)
|
||||
|
||||
global_dirty_limits(v + NR_DIRTY_BG_THRESHOLD,
|
||||
v + NR_DIRTY_THRESHOLD);
|
||||
v += NR_VM_WRITEBACK_STAT_ITEMS;
|
||||
v[NR_MEMMAP_PAGES] = atomic_long_read(&nr_memmap_pages);
|
||||
v[NR_MEMMAP_BOOT_PAGES] = atomic_long_read(&nr_memmap_boot_pages);
|
||||
v += NR_VM_STAT_ITEMS;
|
||||
|
||||
#ifdef CONFIG_VM_EVENT_COUNTERS
|
||||
all_vm_events(v);
|
||||
@ -2285,25 +2305,3 @@ static int __init extfrag_debug_init(void)
|
||||
module_init(extfrag_debug_init);
|
||||
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Page metadata size (struct page and page_ext) in pages
|
||||
*/
|
||||
static unsigned long early_perpage_metadata[MAX_NUMNODES] __meminitdata;
|
||||
|
||||
void __meminit mod_node_early_perpage_metadata(int nid, long delta)
|
||||
{
|
||||
early_perpage_metadata[nid] += delta;
|
||||
}
|
||||
|
||||
void __meminit store_early_perpage_metadata(void)
|
||||
{
|
||||
int nid;
|
||||
struct pglist_data *pgdat;
|
||||
|
||||
for_each_online_pgdat(pgdat) {
|
||||
nid = pgdat->node_id;
|
||||
mod_node_page_state(NODE_DATA(nid), NR_MEMMAP_BOOT,
|
||||
early_perpage_metadata[nid]);
|
||||
}
|
||||
}
|
||||
|
@ -53,7 +53,9 @@ TEST_GEN_FILES += madv_populate
|
||||
TEST_GEN_FILES += map_fixed_noreplace
|
||||
TEST_GEN_FILES += map_hugetlb
|
||||
TEST_GEN_FILES += map_populate
|
||||
ifneq (,$(filter $(ARCH),arm64 riscv riscv64 x86 x86_64))
|
||||
TEST_GEN_FILES += memfd_secret
|
||||
endif
|
||||
TEST_GEN_FILES += migration
|
||||
TEST_GEN_FILES += mkdirty
|
||||
TEST_GEN_FILES += mlock-random-test
|
||||
|
@ -89,9 +89,10 @@ int check_compaction(unsigned long mem_free, unsigned long hugepage_size,
|
||||
int fd, ret = -1;
|
||||
int compaction_index = 0;
|
||||
char nr_hugepages[20] = {0};
|
||||
char init_nr_hugepages[20] = {0};
|
||||
char init_nr_hugepages[24] = {0};
|
||||
|
||||
sprintf(init_nr_hugepages, "%lu", initial_nr_hugepages);
|
||||
snprintf(init_nr_hugepages, sizeof(init_nr_hugepages),
|
||||
"%lu", initial_nr_hugepages);
|
||||
|
||||
/* We want to test with 80% of available memory. Else, OOM killer comes
|
||||
in to play */
|
||||
|
@ -374,8 +374,11 @@ CATEGORY="hmm" run_test bash ./test_hmm.sh smoke
|
||||
# MADV_POPULATE_READ and MADV_POPULATE_WRITE tests
|
||||
CATEGORY="madv_populate" run_test ./madv_populate
|
||||
|
||||
if [ -x ./memfd_secret ]
|
||||
then
|
||||
(echo 0 | sudo tee /proc/sys/kernel/yama/ptrace_scope 2>&1) | tap_prefix
|
||||
CATEGORY="memfd_secret" run_test ./memfd_secret
|
||||
fi
|
||||
|
||||
# KSM KSM_MERGE_TIME_HUGE_PAGES test with size of 100
|
||||
CATEGORY="ksm" run_test ./ksm_tests -H -s 100
|
||||
|
Loading…
Reference in New Issue
Block a user