25d4054cc9
Patch series "mm: Care about shadow stack guard gap when getting an unmapped area", v2. As covered in the commit log forc44357c2e7
("x86/mm: care about shadow stack guard gap during placement") our current mmap() implementation does not take care to ensure that a new mapping isn't placed with existing mappings inside it's own guard gaps. This is particularly important for shadow stacks since if two shadow stacks end up getting placed adjacent to each other then they can overflow into each other which weakens the protection offered by the feature. On x86 there is a custom arch_get_unmapped_area() which was updated by the above commit to cover this case by specifying a start_gap for allocations with VM_SHADOW_STACK. Both arm64 and RISC-V have equivalent features and use the generic implementation of arch_get_unmapped_area() so let's make the equivalent change there so they also don't get shadow stack pages placed without guard pages. The arm64 and RISC-V shadow stack implementations are currently on the list: https://lore.kernel.org/r/20240829-arm64-gcs-v12-0-42fec94743 https://lore.kernel.org/lkml/20240403234054.2020347-1-debug@rivosinc.com/ Given the addition of the use of vm_flags in the generic implementation we also simplify the set of possibilities that have to be dealt with in the core code by making arch_get_unmapped_area() take vm_flags as standard. This is a bit invasive since the prototype change touches quite a few architectures but since the parameter is ignored the change is straightforward, the simplification for the generic code seems worth it. This patch (of 3): When we introduced arch_get_unmapped_area_vmflags() in961148704a
("mm: introduce arch_get_unmapped_area_vmflags()") we did so as part of properly supporting guard pages for shadow stacks on x86_64, which uses a custom arch_get_unmapped_area(). Equivalent features are also present on both arm64 and RISC-V, both of which use the generic implementation of arch_get_unmapped_area() and will require equivalent modification there. Rather than continue to deal with having two versions of the functions let's bite the bullet and have all implementations of arch_get_unmapped_area() take vm_flags as a parameter. The new parameter is currently ignored by all implementations other than x86. The only caller that doesn't have a vm_flags available is mm_get_unmapped_area(), as for the x86 implementation and the wrapper used on other architectures this is modified to supply no flags. No functional changes. Link: https://lkml.kernel.org/r/20240904-mm-generic-shadow-stack-guard-v2-0-a46b8b6dc0ed@kernel.org Link: https://lkml.kernel.org/r/20240904-mm-generic-shadow-stack-guard-v2-1-a46b8b6dc0ed@kernel.org Signed-off-by: Mark Brown <broonie@kernel.org> Acked-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com> Reviewed-by: Liam R. Howlett <Liam.Howlett@Oracle.com> Acked-by: Helge Deller <deller@gmx.de> [parisc] Cc: Alexander Gordeev <agordeev@linux.ibm.com> Cc: Andreas Larsson <andreas@gaisler.com> Cc: Borislav Petkov <bp@alien8.de> Cc: Christian Borntraeger <borntraeger@linux.ibm.com> Cc: Christophe Leroy <christophe.leroy@csgroup.eu> Cc: Chris Zankel <chris@zankel.net> Cc: Dave Hansen <dave.hansen@linux.intel.com> Cc: David S. Miller <davem@davemloft.net> Cc: "Edgecombe, Rick P" <rick.p.edgecombe@intel.com> Cc: Gerald Schaefer <gerald.schaefer@linux.ibm.com> Cc: Guo Ren <guoren@kernel.org> Cc: Heiko Carstens <hca@linux.ibm.com> Cc: "H. Peter Anvin" <hpa@zytor.com> Cc: Huacai Chen <chenhuacai@kernel.org> Cc: Ingo Molnar <mingo@redhat.com> Cc: Ivan Kokshaysky <ink@jurassic.park.msu.ru> Cc: James Bottomley <James.Bottomley@HansenPartnership.com> Cc: John Paul Adrian Glaubitz <glaubitz@physik.fu-berlin.de> Cc: Matt Turner <mattst88@gmail.com> Cc: Max Filippov <jcmvbkbc@gmail.com> Cc: Michael Ellerman <mpe@ellerman.id.au> Cc: Naveen N Rao <naveen@kernel.org> Cc: Nicholas Piggin <npiggin@gmail.com> Cc: Richard Henderson <richard.henderson@linaro.org> Cc: Rich Felker <dalias@libc.org> Cc: Russell King <linux@armlinux.org.uk> Cc: Sven Schnelle <svens@linux.ibm.com> Cc: Thomas Bogendoerfer <tsbogend@alpha.franken.de> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Vasily Gorbik <gor@linux.ibm.com> Cc: Vineet Gupta <vgupta@kernel.org> Cc: Vlastimil Babka <vbabka@suse.cz> Cc: WANG Xuerui <kernel@xen0n.name> Cc: Yoshinori Sato <ysato@users.sourceforge.jp> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
183 lines
4.0 KiB
C
183 lines
4.0 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
/*
|
|
* PARISC64 Huge TLB page support.
|
|
*
|
|
* This parisc implementation is heavily based on the SPARC and x86 code.
|
|
*
|
|
* Copyright (C) 2015 Helge Deller <deller@gmx.de>
|
|
*/
|
|
|
|
#include <linux/fs.h>
|
|
#include <linux/mm.h>
|
|
#include <linux/sched/mm.h>
|
|
#include <linux/hugetlb.h>
|
|
#include <linux/pagemap.h>
|
|
#include <linux/sysctl.h>
|
|
|
|
#include <asm/mman.h>
|
|
#include <asm/tlb.h>
|
|
#include <asm/tlbflush.h>
|
|
#include <asm/cacheflush.h>
|
|
#include <asm/mmu_context.h>
|
|
|
|
|
|
unsigned long
|
|
hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
|
|
unsigned long len, unsigned long pgoff, unsigned long flags)
|
|
{
|
|
struct hstate *h = hstate_file(file);
|
|
|
|
if (len & ~huge_page_mask(h))
|
|
return -EINVAL;
|
|
if (len > TASK_SIZE)
|
|
return -ENOMEM;
|
|
|
|
if (flags & MAP_FIXED)
|
|
if (prepare_hugepage_range(file, addr, len))
|
|
return -EINVAL;
|
|
|
|
if (addr)
|
|
addr = ALIGN(addr, huge_page_size(h));
|
|
|
|
/* we need to make sure the colouring is OK */
|
|
return arch_get_unmapped_area(file, addr, len, pgoff, flags, 0);
|
|
}
|
|
|
|
|
|
pte_t *huge_pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma,
|
|
unsigned long addr, unsigned long sz)
|
|
{
|
|
pgd_t *pgd;
|
|
p4d_t *p4d;
|
|
pud_t *pud;
|
|
pmd_t *pmd;
|
|
pte_t *pte = NULL;
|
|
|
|
/* We must align the address, because our caller will run
|
|
* set_huge_pte_at() on whatever we return, which writes out
|
|
* all of the sub-ptes for the hugepage range. So we have
|
|
* to give it the first such sub-pte.
|
|
*/
|
|
addr &= HPAGE_MASK;
|
|
|
|
pgd = pgd_offset(mm, addr);
|
|
p4d = p4d_offset(pgd, addr);
|
|
pud = pud_alloc(mm, p4d, addr);
|
|
if (pud) {
|
|
pmd = pmd_alloc(mm, pud, addr);
|
|
if (pmd)
|
|
pte = pte_alloc_huge(mm, pmd, addr);
|
|
}
|
|
return pte;
|
|
}
|
|
|
|
pte_t *huge_pte_offset(struct mm_struct *mm,
|
|
unsigned long addr, unsigned long sz)
|
|
{
|
|
pgd_t *pgd;
|
|
p4d_t *p4d;
|
|
pud_t *pud;
|
|
pmd_t *pmd;
|
|
pte_t *pte = NULL;
|
|
|
|
addr &= HPAGE_MASK;
|
|
|
|
pgd = pgd_offset(mm, addr);
|
|
if (!pgd_none(*pgd)) {
|
|
p4d = p4d_offset(pgd, addr);
|
|
if (!p4d_none(*p4d)) {
|
|
pud = pud_offset(p4d, addr);
|
|
if (!pud_none(*pud)) {
|
|
pmd = pmd_offset(pud, addr);
|
|
if (!pmd_none(*pmd))
|
|
pte = pte_offset_huge(pmd, addr);
|
|
}
|
|
}
|
|
}
|
|
return pte;
|
|
}
|
|
|
|
/* Purge data and instruction TLB entries. Must be called holding
|
|
* the pa_tlb_lock. The TLB purge instructions are slow on SMP
|
|
* machines since the purge must be broadcast to all CPUs.
|
|
*/
|
|
static inline void purge_tlb_entries_huge(struct mm_struct *mm, unsigned long addr)
|
|
{
|
|
int i;
|
|
|
|
/* We may use multiple physical huge pages (e.g. 2x1 MB) to emulate
|
|
* Linux standard huge pages (e.g. 2 MB) */
|
|
BUILD_BUG_ON(REAL_HPAGE_SHIFT > HPAGE_SHIFT);
|
|
|
|
addr &= HPAGE_MASK;
|
|
addr |= _HUGE_PAGE_SIZE_ENCODING_DEFAULT;
|
|
|
|
for (i = 0; i < (1 << (HPAGE_SHIFT-REAL_HPAGE_SHIFT)); i++) {
|
|
purge_tlb_entries(mm, addr);
|
|
addr += (1UL << REAL_HPAGE_SHIFT);
|
|
}
|
|
}
|
|
|
|
/* __set_huge_pte_at() must be called holding the pa_tlb_lock. */
|
|
static void __set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
|
|
pte_t *ptep, pte_t entry)
|
|
{
|
|
unsigned long addr_start;
|
|
int i;
|
|
|
|
addr &= HPAGE_MASK;
|
|
addr_start = addr;
|
|
|
|
for (i = 0; i < (1 << HUGETLB_PAGE_ORDER); i++) {
|
|
set_pte(ptep, entry);
|
|
ptep++;
|
|
|
|
addr += PAGE_SIZE;
|
|
pte_val(entry) += PAGE_SIZE;
|
|
}
|
|
|
|
purge_tlb_entries_huge(mm, addr_start);
|
|
}
|
|
|
|
void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
|
|
pte_t *ptep, pte_t entry, unsigned long sz)
|
|
{
|
|
__set_huge_pte_at(mm, addr, ptep, entry);
|
|
}
|
|
|
|
|
|
pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
|
|
pte_t *ptep)
|
|
{
|
|
pte_t entry;
|
|
|
|
entry = *ptep;
|
|
__set_huge_pte_at(mm, addr, ptep, __pte(0));
|
|
|
|
return entry;
|
|
}
|
|
|
|
|
|
void huge_ptep_set_wrprotect(struct mm_struct *mm,
|
|
unsigned long addr, pte_t *ptep)
|
|
{
|
|
pte_t old_pte;
|
|
|
|
old_pte = *ptep;
|
|
__set_huge_pte_at(mm, addr, ptep, pte_wrprotect(old_pte));
|
|
}
|
|
|
|
int huge_ptep_set_access_flags(struct vm_area_struct *vma,
|
|
unsigned long addr, pte_t *ptep,
|
|
pte_t pte, int dirty)
|
|
{
|
|
int changed;
|
|
struct mm_struct *mm = vma->vm_mm;
|
|
|
|
changed = !pte_same(*ptep, pte);
|
|
if (changed) {
|
|
__set_huge_pte_at(mm, addr, ptep, pte);
|
|
}
|
|
return changed;
|
|
}
|