2019-05-26 05:50:38 -07:00
|
|
|
// SPDX-License-Identifier: GPL-2.0
|
|
|
|
#include <linux/hugetlb.h>
|
|
|
|
#include <linux/err.h>
|
|
|
|
|
2023-02-09 06:16:46 -07:00
|
|
|
#ifdef CONFIG_RISCV_ISA_SVNAPOT
|
2024-07-02 06:51:20 -07:00
|
|
|
pte_t huge_ptep_get(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
|
2023-04-28 05:01:20 -07:00
|
|
|
{
|
|
|
|
unsigned long pte_num;
|
|
|
|
int i;
|
|
|
|
pte_t orig_pte = ptep_get(ptep);
|
|
|
|
|
|
|
|
if (!pte_present(orig_pte) || !pte_napot(orig_pte))
|
|
|
|
return orig_pte;
|
|
|
|
|
|
|
|
pte_num = napot_pte_num(napot_cont_order(orig_pte));
|
|
|
|
|
|
|
|
for (i = 0; i < pte_num; i++, ptep++) {
|
|
|
|
pte_t pte = ptep_get(ptep);
|
|
|
|
|
|
|
|
if (pte_dirty(pte))
|
|
|
|
orig_pte = pte_mkdirty(orig_pte);
|
|
|
|
|
|
|
|
if (pte_young(pte))
|
|
|
|
orig_pte = pte_mkyoung(orig_pte);
|
|
|
|
}
|
|
|
|
|
|
|
|
return orig_pte;
|
|
|
|
}
|
|
|
|
|
2023-02-09 06:16:46 -07:00
|
|
|
pte_t *huge_pte_alloc(struct mm_struct *mm,
|
|
|
|
struct vm_area_struct *vma,
|
|
|
|
unsigned long addr,
|
|
|
|
unsigned long sz)
|
|
|
|
{
|
|
|
|
unsigned long order;
|
|
|
|
pte_t *pte = NULL;
|
|
|
|
pgd_t *pgd;
|
|
|
|
p4d_t *p4d;
|
|
|
|
pud_t *pud;
|
|
|
|
pmd_t *pmd;
|
|
|
|
|
|
|
|
pgd = pgd_offset(mm, addr);
|
|
|
|
p4d = p4d_alloc(mm, pgd, addr);
|
|
|
|
if (!p4d)
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
pud = pud_alloc(mm, p4d, addr);
|
|
|
|
if (!pud)
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
if (sz == PUD_SIZE) {
|
|
|
|
pte = (pte_t *)pud;
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (sz == PMD_SIZE) {
|
2023-12-13 13:30:01 -07:00
|
|
|
if (want_pmd_share(vma, addr) && pud_none(pudp_get(pud)))
|
2023-02-09 06:16:46 -07:00
|
|
|
pte = huge_pmd_share(mm, vma, addr, pud);
|
|
|
|
else
|
|
|
|
pte = (pte_t *)pmd_alloc(mm, pud, addr);
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
|
|
|
pmd = pmd_alloc(mm, pud, addr);
|
|
|
|
if (!pmd)
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
for_each_napot_order(order) {
|
|
|
|
if (napot_cont_size(order) == sz) {
|
2023-06-08 12:25:42 -07:00
|
|
|
pte = pte_alloc_huge(mm, pmd, addr & napot_cont_mask(order));
|
2023-02-09 06:16:46 -07:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
out:
|
mm: riscv: fix an unsafe pte read in huge_pte_alloc()
The WARN_ON_ONCE() statement in riscv's huge_pte_alloc() is susceptible
to false positives, because the pte is read twice at the C language
level, locklessly, within the same conditional statement. Depending on
compiler behavior, this can lead to generated machine code that actually
reads the pte just once, or twice. Reading twice will expose the code to
changing pte values and cause incorrect behavior.
In [1], similar code actually caused a kernel crash on 64-bit x86, when
using clang to build the kernel, but only after the conversion from *pte
reads, to ptep_get(pte). The latter uses READ_ONCE(), which forced a
double read of *pte.
Rather than waiting for the upcoming ptep_get() conversion, just convert
this part of the code now, but in a way that avoids the above problem:
take a single snapshot of the pte before using it in the WARN
conditional.
As expected, this preparatory step does not actually change the
generated code ("make mm/hugetlbpage.s"), on riscv64, when using a gcc
12.2 cross compiler.
[1] https://lore.kernel.org/20230630013203.1955064-1-jhubbard@nvidia.com
Suggested-by: James Houghton <jthoughton@google.com>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Signed-off-by: John Hubbard <jhubbard@nvidia.com>
Reviewed-by: Andrew Jones <ajones@ventanamicro.com>
Reviewed-by: Ryan Roberts <ryan.roberts@arm.com>
Link: https://lore.kernel.org/r/20230703190044.311730-1-jhubbard@nvidia.com
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
2023-07-03 12:00:44 -07:00
|
|
|
if (pte) {
|
|
|
|
pte_t pteval = ptep_get_lockless(pte);
|
|
|
|
|
|
|
|
WARN_ON_ONCE(pte_present(pteval) && !pte_huge(pteval));
|
|
|
|
}
|
2023-02-09 06:16:46 -07:00
|
|
|
return pte;
|
|
|
|
}
|
|
|
|
|
|
|
|
pte_t *huge_pte_offset(struct mm_struct *mm,
|
|
|
|
unsigned long addr,
|
|
|
|
unsigned long sz)
|
|
|
|
{
|
|
|
|
unsigned long order;
|
|
|
|
pte_t *pte = NULL;
|
|
|
|
pgd_t *pgd;
|
|
|
|
p4d_t *p4d;
|
|
|
|
pud_t *pud;
|
|
|
|
pmd_t *pmd;
|
|
|
|
|
|
|
|
pgd = pgd_offset(mm, addr);
|
2023-12-13 13:30:01 -07:00
|
|
|
if (!pgd_present(pgdp_get(pgd)))
|
2023-02-09 06:16:46 -07:00
|
|
|
return NULL;
|
|
|
|
|
|
|
|
p4d = p4d_offset(pgd, addr);
|
2023-12-13 13:30:01 -07:00
|
|
|
if (!p4d_present(p4dp_get(p4d)))
|
2023-02-09 06:16:46 -07:00
|
|
|
return NULL;
|
|
|
|
|
|
|
|
pud = pud_offset(p4d, addr);
|
|
|
|
if (sz == PUD_SIZE)
|
|
|
|
/* must be pud huge, non-present or none */
|
|
|
|
return (pte_t *)pud;
|
|
|
|
|
2023-12-13 13:30:01 -07:00
|
|
|
if (!pud_present(pudp_get(pud)))
|
2023-02-09 06:16:46 -07:00
|
|
|
return NULL;
|
|
|
|
|
|
|
|
pmd = pmd_offset(pud, addr);
|
|
|
|
if (sz == PMD_SIZE)
|
|
|
|
/* must be pmd huge, non-present or none */
|
|
|
|
return (pte_t *)pmd;
|
|
|
|
|
2023-12-13 13:30:01 -07:00
|
|
|
if (!pmd_present(pmdp_get(pmd)))
|
2023-02-09 06:16:46 -07:00
|
|
|
return NULL;
|
|
|
|
|
|
|
|
for_each_napot_order(order) {
|
|
|
|
if (napot_cont_size(order) == sz) {
|
2023-06-08 12:25:42 -07:00
|
|
|
pte = pte_offset_huge(pmd, addr & napot_cont_mask(order));
|
2023-02-09 06:16:46 -07:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return pte;
|
|
|
|
}
|
|
|
|
|
2024-01-17 12:57:41 -07:00
|
|
|
unsigned long hugetlb_mask_last_page(struct hstate *h)
|
|
|
|
{
|
|
|
|
unsigned long hp_size = huge_page_size(h);
|
|
|
|
|
|
|
|
switch (hp_size) {
|
|
|
|
#ifndef __PAGETABLE_PMD_FOLDED
|
|
|
|
case PUD_SIZE:
|
|
|
|
return P4D_SIZE - PUD_SIZE;
|
|
|
|
#endif
|
|
|
|
case PMD_SIZE:
|
|
|
|
return PUD_SIZE - PMD_SIZE;
|
|
|
|
case napot_cont_size(NAPOT_CONT64KB_ORDER):
|
|
|
|
return PMD_SIZE - napot_cont_size(NAPOT_CONT64KB_ORDER);
|
|
|
|
default:
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0UL;
|
|
|
|
}
|
|
|
|
|
2023-02-09 06:16:46 -07:00
|
|
|
static pte_t get_clear_contig(struct mm_struct *mm,
|
|
|
|
unsigned long addr,
|
|
|
|
pte_t *ptep,
|
|
|
|
unsigned long pte_num)
|
|
|
|
{
|
|
|
|
pte_t orig_pte = ptep_get(ptep);
|
|
|
|
unsigned long i;
|
|
|
|
|
|
|
|
for (i = 0; i < pte_num; i++, addr += PAGE_SIZE, ptep++) {
|
|
|
|
pte_t pte = ptep_get_and_clear(mm, addr, ptep);
|
|
|
|
|
|
|
|
if (pte_dirty(pte))
|
|
|
|
orig_pte = pte_mkdirty(orig_pte);
|
|
|
|
|
|
|
|
if (pte_young(pte))
|
|
|
|
orig_pte = pte_mkyoung(orig_pte);
|
|
|
|
}
|
|
|
|
|
|
|
|
return orig_pte;
|
|
|
|
}
|
|
|
|
|
|
|
|
static pte_t get_clear_contig_flush(struct mm_struct *mm,
|
|
|
|
unsigned long addr,
|
|
|
|
pte_t *ptep,
|
|
|
|
unsigned long pte_num)
|
|
|
|
{
|
|
|
|
pte_t orig_pte = get_clear_contig(mm, addr, ptep, pte_num);
|
|
|
|
struct vm_area_struct vma = TLB_FLUSH_VMA(mm, 0);
|
|
|
|
bool valid = !pte_none(orig_pte);
|
|
|
|
|
|
|
|
if (valid)
|
|
|
|
flush_tlb_range(&vma, addr, addr + (PAGE_SIZE * pte_num));
|
|
|
|
|
|
|
|
return orig_pte;
|
|
|
|
}
|
|
|
|
|
|
|
|
pte_t arch_make_huge_pte(pte_t entry, unsigned int shift, vm_flags_t flags)
|
|
|
|
{
|
|
|
|
unsigned long order;
|
|
|
|
|
|
|
|
for_each_napot_order(order) {
|
|
|
|
if (shift == napot_cont_shift(order)) {
|
|
|
|
entry = pte_mknapot(entry, order);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (order == NAPOT_ORDER_MAX)
|
|
|
|
entry = pte_mkhuge(entry);
|
|
|
|
|
|
|
|
return entry;
|
|
|
|
}
|
|
|
|
|
2024-01-17 12:57:40 -07:00
|
|
|
static void clear_flush(struct mm_struct *mm,
|
|
|
|
unsigned long addr,
|
|
|
|
pte_t *ptep,
|
|
|
|
unsigned long pgsize,
|
|
|
|
unsigned long ncontig)
|
|
|
|
{
|
|
|
|
struct vm_area_struct vma = TLB_FLUSH_VMA(mm, 0);
|
|
|
|
unsigned long i, saddr = addr;
|
|
|
|
|
|
|
|
for (i = 0; i < ncontig; i++, addr += pgsize, ptep++)
|
|
|
|
ptep_get_and_clear(mm, addr, ptep);
|
|
|
|
|
|
|
|
flush_tlb_range(&vma, saddr, addr);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* When dealing with NAPOT mappings, the privileged specification indicates that
|
|
|
|
* "if an update needs to be made, the OS generally should first mark all of the
|
|
|
|
* PTEs invalid, then issue SFENCE.VMA instruction(s) covering all 4 KiB regions
|
|
|
|
* within the range, [...] then update the PTE(s), as described in Section
|
|
|
|
* 4.2.1.". That's the equivalent of the Break-Before-Make approach used by
|
|
|
|
* arm64.
|
|
|
|
*/
|
2023-02-09 06:16:46 -07:00
|
|
|
void set_huge_pte_at(struct mm_struct *mm,
|
|
|
|
unsigned long addr,
|
|
|
|
pte_t *ptep,
|
mm: hugetlb: add huge page size param to set_huge_pte_at()
Patch series "Fix set_huge_pte_at() panic on arm64", v2.
This series fixes a bug in arm64's implementation of set_huge_pte_at(),
which can result in an unprivileged user causing a kernel panic. The
problem was triggered when running the new uffd poison mm selftest for
HUGETLB memory. This test (and the uffd poison feature) was merged for
v6.5-rc7.
Ideally, I'd like to get this fix in for v6.6 and I've cc'ed stable
(correctly this time) to get it backported to v6.5, where the issue first
showed up.
Description of Bug
==================
arm64's huge pte implementation supports multiple huge page sizes, some of
which are implemented in the page table with multiple contiguous entries.
So set_huge_pte_at() needs to work out how big the logical pte is, so that
it can also work out how many physical ptes (or pmds) need to be written.
It previously did this by grabbing the folio out of the pte and querying
its size.
However, there are cases when the pte being set is actually a swap entry.
But this also used to work fine, because for huge ptes, we only ever saw
migration entries and hwpoison entries. And both of these types of swap
entries have a PFN embedded, so the code would grab that and everything
still worked out.
But over time, more calls to set_huge_pte_at() have been added that set
swap entry types that do not embed a PFN. And this causes the code to go
bang. The triggering case is for the uffd poison test, commit
99aa77215ad0 ("selftests/mm: add uffd unit test for UFFDIO_POISON"), which
causes a PTE_MARKER_POISONED swap entry to be set, coutesey of commit
8a13897fb0da ("mm: userfaultfd: support UFFDIO_POISON for hugetlbfs") -
added in v6.5-rc7. Although review shows that there are other call sites
that set PTE_MARKER_UFFD_WP (which also has no PFN), these don't trigger
on arm64 because arm64 doesn't support UFFD WP.
If CONFIG_DEBUG_VM is enabled, we do at least get a BUG(), but otherwise,
it will dereference a bad pointer in page_folio():
static inline struct folio *hugetlb_swap_entry_to_folio(swp_entry_t entry)
{
VM_BUG_ON(!is_migration_entry(entry) && !is_hwpoison_entry(entry));
return page_folio(pfn_to_page(swp_offset_pfn(entry)));
}
Fix
===
The simplest fix would have been to revert the dodgy cleanup commit
18f3962953e4 ("mm: hugetlb: kill set_huge_swap_pte_at()"), but since
things have moved on, this would have required an audit of all the new
set_huge_pte_at() call sites to see if they should be converted to
set_huge_swap_pte_at(). As per the original intent of the change, it
would also leave us open to future bugs when people invariably get it
wrong and call the wrong helper.
So instead, I've added a huge page size parameter to set_huge_pte_at().
This means that the arm64 code has the size in all cases. It's a bigger
change, due to needing to touch the arches that implement the function,
but it is entirely mechanical, so in my view, low risk.
I've compile-tested all touched arches; arm64, parisc, powerpc, riscv,
s390, sparc (and additionally x86_64). I've additionally booted and run
mm selftests against arm64, where I observe the uffd poison test is fixed,
and there are no other regressions.
This patch (of 2):
In order to fix a bug, arm64 needs to be told the size of the huge page
for which the pte is being set in set_huge_pte_at(). Provide for this by
adding an `unsigned long sz` parameter to the function. This follows the
same pattern as huge_pte_clear().
This commit makes the required interface modifications to the core mm as
well as all arches that implement this function (arm64, parisc, powerpc,
riscv, s390, sparc). The actual arm64 bug will be fixed in a separate
commit.
No behavioral changes intended.
Link: https://lkml.kernel.org/r/20230922115804.2043771-1-ryan.roberts@arm.com
Link: https://lkml.kernel.org/r/20230922115804.2043771-2-ryan.roberts@arm.com
Fixes: 8a13897fb0da ("mm: userfaultfd: support UFFDIO_POISON for hugetlbfs")
Signed-off-by: Ryan Roberts <ryan.roberts@arm.com>
Reviewed-by: Christophe Leroy <christophe.leroy@csgroup.eu> [powerpc 8xx]
Reviewed-by: Lorenzo Stoakes <lstoakes@gmail.com> [vmalloc change]
Cc: Alexandre Ghiti <alex@ghiti.fr>
Cc: Albert Ou <aou@eecs.berkeley.edu>
Cc: Alexander Gordeev <agordeev@linux.ibm.com>
Cc: Anshuman Khandual <anshuman.khandual@arm.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Axel Rasmussen <axelrasmussen@google.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Christian Borntraeger <borntraeger@linux.ibm.com>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: David S. Miller <davem@davemloft.net>
Cc: Gerald Schaefer <gerald.schaefer@linux.ibm.com>
Cc: Heiko Carstens <hca@linux.ibm.com>
Cc: Helge Deller <deller@gmx.de>
Cc: "James E.J. Bottomley" <James.Bottomley@HansenPartnership.com>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Palmer Dabbelt <palmer@dabbelt.com>
Cc: Paul Walmsley <paul.walmsley@sifive.com>
Cc: Peter Xu <peterx@redhat.com>
Cc: Qi Zheng <zhengqi.arch@bytedance.com>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Cc: SeongJae Park <sj@kernel.org>
Cc: Sven Schnelle <svens@linux.ibm.com>
Cc: Uladzislau Rezki (Sony) <urezki@gmail.com>
Cc: Vasily Gorbik <gor@linux.ibm.com>
Cc: Will Deacon <will@kernel.org>
Cc: <stable@vger.kernel.org> [6.5+]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2023-09-22 04:58:03 -07:00
|
|
|
pte_t pte,
|
|
|
|
unsigned long sz)
|
2023-02-09 06:16:46 -07:00
|
|
|
{
|
2024-01-17 12:57:40 -07:00
|
|
|
unsigned long hugepage_shift, pgsize;
|
2023-02-09 06:16:46 -07:00
|
|
|
int i, pte_num;
|
|
|
|
|
2023-09-28 08:18:46 -07:00
|
|
|
if (sz >= PGDIR_SIZE)
|
|
|
|
hugepage_shift = PGDIR_SHIFT;
|
|
|
|
else if (sz >= P4D_SIZE)
|
|
|
|
hugepage_shift = P4D_SHIFT;
|
|
|
|
else if (sz >= PUD_SIZE)
|
|
|
|
hugepage_shift = PUD_SHIFT;
|
|
|
|
else if (sz >= PMD_SIZE)
|
|
|
|
hugepage_shift = PMD_SHIFT;
|
|
|
|
else
|
|
|
|
hugepage_shift = PAGE_SHIFT;
|
|
|
|
|
|
|
|
pte_num = sz >> hugepage_shift;
|
2024-01-17 12:57:40 -07:00
|
|
|
pgsize = 1 << hugepage_shift;
|
|
|
|
|
|
|
|
if (!pte_present(pte)) {
|
|
|
|
for (i = 0; i < pte_num; i++, ptep++, addr += pgsize)
|
|
|
|
set_ptes(mm, addr, ptep, pte, 1);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!pte_napot(pte)) {
|
|
|
|
set_ptes(mm, addr, ptep, pte, 1);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
clear_flush(mm, addr, ptep, pgsize, pte_num);
|
|
|
|
|
|
|
|
for (i = 0; i < pte_num; i++, ptep++, addr += pgsize)
|
2023-02-09 06:16:46 -07:00
|
|
|
set_pte_at(mm, addr, ptep, pte);
|
|
|
|
}
|
|
|
|
|
|
|
|
int huge_ptep_set_access_flags(struct vm_area_struct *vma,
|
|
|
|
unsigned long addr,
|
|
|
|
pte_t *ptep,
|
|
|
|
pte_t pte,
|
|
|
|
int dirty)
|
|
|
|
{
|
|
|
|
struct mm_struct *mm = vma->vm_mm;
|
|
|
|
unsigned long order;
|
|
|
|
pte_t orig_pte;
|
|
|
|
int i, pte_num;
|
|
|
|
|
|
|
|
if (!pte_napot(pte))
|
|
|
|
return ptep_set_access_flags(vma, addr, ptep, pte, dirty);
|
|
|
|
|
|
|
|
order = napot_cont_order(pte);
|
|
|
|
pte_num = napot_pte_num(order);
|
|
|
|
ptep = huge_pte_offset(mm, addr, napot_cont_size(order));
|
|
|
|
orig_pte = get_clear_contig_flush(mm, addr, ptep, pte_num);
|
|
|
|
|
|
|
|
if (pte_dirty(orig_pte))
|
|
|
|
pte = pte_mkdirty(pte);
|
|
|
|
|
|
|
|
if (pte_young(orig_pte))
|
|
|
|
pte = pte_mkyoung(pte);
|
|
|
|
|
|
|
|
for (i = 0; i < pte_num; i++, addr += PAGE_SIZE, ptep++)
|
|
|
|
set_pte_at(mm, addr, ptep, pte);
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
|
|
|
|
unsigned long addr,
|
|
|
|
pte_t *ptep)
|
|
|
|
{
|
|
|
|
pte_t orig_pte = ptep_get(ptep);
|
|
|
|
int pte_num;
|
|
|
|
|
|
|
|
if (!pte_napot(orig_pte))
|
|
|
|
return ptep_get_and_clear(mm, addr, ptep);
|
|
|
|
|
|
|
|
pte_num = napot_pte_num(napot_cont_order(orig_pte));
|
|
|
|
|
|
|
|
return get_clear_contig(mm, addr, ptep, pte_num);
|
|
|
|
}
|
|
|
|
|
|
|
|
void huge_ptep_set_wrprotect(struct mm_struct *mm,
|
|
|
|
unsigned long addr,
|
|
|
|
pte_t *ptep)
|
|
|
|
{
|
|
|
|
pte_t pte = ptep_get(ptep);
|
|
|
|
unsigned long order;
|
2023-04-28 05:01:19 -07:00
|
|
|
pte_t orig_pte;
|
2023-02-09 06:16:46 -07:00
|
|
|
int i, pte_num;
|
|
|
|
|
|
|
|
if (!pte_napot(pte)) {
|
|
|
|
ptep_set_wrprotect(mm, addr, ptep);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
order = napot_cont_order(pte);
|
|
|
|
pte_num = napot_pte_num(order);
|
|
|
|
ptep = huge_pte_offset(mm, addr, napot_cont_size(order));
|
2023-04-28 05:01:19 -07:00
|
|
|
orig_pte = get_clear_contig_flush(mm, addr, ptep, pte_num);
|
|
|
|
|
|
|
|
orig_pte = pte_wrprotect(orig_pte);
|
2023-02-09 06:16:46 -07:00
|
|
|
|
|
|
|
for (i = 0; i < pte_num; i++, addr += PAGE_SIZE, ptep++)
|
2023-04-28 05:01:19 -07:00
|
|
|
set_pte_at(mm, addr, ptep, orig_pte);
|
2023-02-09 06:16:46 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
pte_t huge_ptep_clear_flush(struct vm_area_struct *vma,
|
|
|
|
unsigned long addr,
|
|
|
|
pte_t *ptep)
|
|
|
|
{
|
|
|
|
pte_t pte = ptep_get(ptep);
|
|
|
|
int pte_num;
|
|
|
|
|
|
|
|
if (!pte_napot(pte))
|
|
|
|
return ptep_clear_flush(vma, addr, ptep);
|
|
|
|
|
|
|
|
pte_num = napot_pte_num(napot_cont_order(pte));
|
|
|
|
|
|
|
|
return get_clear_contig_flush(vma->vm_mm, addr, ptep, pte_num);
|
|
|
|
}
|
|
|
|
|
|
|
|
void huge_pte_clear(struct mm_struct *mm,
|
|
|
|
unsigned long addr,
|
|
|
|
pte_t *ptep,
|
|
|
|
unsigned long sz)
|
|
|
|
{
|
2023-12-13 13:30:01 -07:00
|
|
|
pte_t pte = ptep_get(ptep);
|
2023-02-09 06:16:46 -07:00
|
|
|
int i, pte_num;
|
|
|
|
|
|
|
|
if (!pte_napot(pte)) {
|
|
|
|
pte_clear(mm, addr, ptep);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
pte_num = napot_pte_num(napot_cont_order(pte));
|
|
|
|
for (i = 0; i < pte_num; i++, addr += PAGE_SIZE, ptep++)
|
|
|
|
pte_clear(mm, addr, ptep);
|
|
|
|
}
|
|
|
|
|
2024-01-30 05:01:14 -07:00
|
|
|
static bool is_napot_size(unsigned long size)
|
2023-02-09 06:16:46 -07:00
|
|
|
{
|
|
|
|
unsigned long order;
|
|
|
|
|
|
|
|
if (!has_svnapot())
|
|
|
|
return false;
|
|
|
|
|
|
|
|
for_each_napot_order(order) {
|
|
|
|
if (size == napot_cont_size(order))
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
static __init int napot_hugetlbpages_init(void)
|
|
|
|
{
|
|
|
|
if (has_svnapot()) {
|
|
|
|
unsigned long order;
|
|
|
|
|
|
|
|
for_each_napot_order(order)
|
|
|
|
hugetlb_add_hstate(order);
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
arch_initcall(napot_hugetlbpages_init);
|
|
|
|
|
|
|
|
#else
|
|
|
|
|
2024-01-30 05:01:14 -07:00
|
|
|
static bool is_napot_size(unsigned long size)
|
2023-02-09 06:16:46 -07:00
|
|
|
{
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
#endif /*CONFIG_RISCV_ISA_SVNAPOT*/
|
|
|
|
|
2024-01-30 05:01:14 -07:00
|
|
|
static bool __hugetlb_valid_size(unsigned long size)
|
2020-06-03 16:00:34 -07:00
|
|
|
{
|
|
|
|
if (size == HPAGE_SIZE)
|
|
|
|
return true;
|
|
|
|
else if (IS_ENABLED(CONFIG_64BIT) && size == PUD_SIZE)
|
|
|
|
return true;
|
2023-02-09 06:16:46 -07:00
|
|
|
else if (is_napot_size(size))
|
|
|
|
return true;
|
2020-06-03 16:00:34 -07:00
|
|
|
else
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2024-01-30 05:01:14 -07:00
|
|
|
bool __init arch_hugetlb_valid_size(unsigned long size)
|
|
|
|
{
|
|
|
|
return __hugetlb_valid_size(size);
|
|
|
|
}
|
|
|
|
|
2024-02-11 01:36:40 -07:00
|
|
|
#ifdef CONFIG_ARCH_ENABLE_HUGEPAGE_MIGRATION
|
2024-01-30 05:01:14 -07:00
|
|
|
bool arch_hugetlb_migration_supported(struct hstate *h)
|
|
|
|
{
|
|
|
|
return __hugetlb_valid_size(huge_page_size(h));
|
|
|
|
}
|
2024-02-11 01:36:40 -07:00
|
|
|
#endif
|
2024-01-30 05:01:14 -07:00
|
|
|
|
2019-05-26 05:50:38 -07:00
|
|
|
#ifdef CONFIG_CONTIG_ALLOC
|
|
|
|
static __init int gigantic_pages_init(void)
|
|
|
|
{
|
|
|
|
/* With CONTIG_ALLOC, we can allocate gigantic pages at runtime */
|
2020-06-03 16:00:42 -07:00
|
|
|
if (IS_ENABLED(CONFIG_64BIT))
|
2019-05-26 05:50:38 -07:00
|
|
|
hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
arch_initcall(gigantic_pages_init);
|
|
|
|
#endif
|