mm: new follow_pfnmap API
Introduce a pair of APIs to follow pfn mappings to get entry information. It's very similar to what follow_pte() does before, but different in that it recognizes huge pfn mappings. Link: https://lkml.kernel.org/r/20240826204353.2228736-10-peterx@redhat.com Signed-off-by: Peter Xu <peterx@redhat.com> Cc: Alexander Gordeev <agordeev@linux.ibm.com> Cc: Alex Williamson <alex.williamson@redhat.com> Cc: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com> Cc: Borislav Petkov <bp@alien8.de> Cc: Catalin Marinas <catalin.marinas@arm.com> Cc: Christian Borntraeger <borntraeger@linux.ibm.com> Cc: Dave Hansen <dave.hansen@linux.intel.com> Cc: David Hildenbrand <david@redhat.com> Cc: Gavin Shan <gshan@redhat.com> Cc: Gerald Schaefer <gerald.schaefer@linux.ibm.com> Cc: Heiko Carstens <hca@linux.ibm.com> Cc: Ingo Molnar <mingo@redhat.com> Cc: Jason Gunthorpe <jgg@nvidia.com> Cc: Matthew Wilcox <willy@infradead.org> Cc: Niklas Schnelle <schnelle@linux.ibm.com> Cc: Paolo Bonzini <pbonzini@redhat.com> Cc: Ryan Roberts <ryan.roberts@arm.com> Cc: Sean Christopherson <seanjc@google.com> Cc: Sven Schnelle <svens@linux.ibm.com> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Vasily Gorbik <gor@linux.ibm.com> Cc: Will Deacon <will@kernel.org> Cc: Zi Yan <ziy@nvidia.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
This commit is contained in:
parent
0515e022e1
commit
6da8e9634b
@ -2373,6 +2373,37 @@ int follow_pte(struct vm_area_struct *vma, unsigned long address,
|
||||
int generic_access_phys(struct vm_area_struct *vma, unsigned long addr,
|
||||
void *buf, int len, int write);
|
||||
|
||||
struct follow_pfnmap_args {
|
||||
/**
|
||||
* Inputs:
|
||||
* @vma: Pointer to @vm_area_struct struct
|
||||
* @address: the virtual address to walk
|
||||
*/
|
||||
struct vm_area_struct *vma;
|
||||
unsigned long address;
|
||||
/**
|
||||
* Internals:
|
||||
*
|
||||
* The caller shouldn't touch any of these.
|
||||
*/
|
||||
spinlock_t *lock;
|
||||
pte_t *ptep;
|
||||
/**
|
||||
* Outputs:
|
||||
*
|
||||
* @pfn: the PFN of the address
|
||||
* @pgprot: the pgprot_t of the mapping
|
||||
* @writable: whether the mapping is writable
|
||||
* @special: whether the mapping is a special mapping (real PFN maps)
|
||||
*/
|
||||
unsigned long pfn;
|
||||
pgprot_t pgprot;
|
||||
bool writable;
|
||||
bool special;
|
||||
};
|
||||
int follow_pfnmap_start(struct follow_pfnmap_args *args);
|
||||
void follow_pfnmap_end(struct follow_pfnmap_args *args);
|
||||
|
||||
extern void truncate_pagecache(struct inode *inode, loff_t new);
|
||||
extern void truncate_setsize(struct inode *inode, loff_t newsize);
|
||||
void pagecache_isize_extended(struct inode *inode, loff_t from, loff_t to);
|
||||
|
150
mm/memory.c
150
mm/memory.c
@ -6172,6 +6172,156 @@ out:
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(follow_pte);
|
||||
|
||||
static inline void pfnmap_args_setup(struct follow_pfnmap_args *args,
|
||||
spinlock_t *lock, pte_t *ptep,
|
||||
pgprot_t pgprot, unsigned long pfn_base,
|
||||
unsigned long addr_mask, bool writable,
|
||||
bool special)
|
||||
{
|
||||
args->lock = lock;
|
||||
args->ptep = ptep;
|
||||
args->pfn = pfn_base + ((args->address & ~addr_mask) >> PAGE_SHIFT);
|
||||
args->pgprot = pgprot;
|
||||
args->writable = writable;
|
||||
args->special = special;
|
||||
}
|
||||
|
||||
static inline void pfnmap_lockdep_assert(struct vm_area_struct *vma)
|
||||
{
|
||||
#ifdef CONFIG_LOCKDEP
|
||||
struct address_space *mapping = vma->vm_file->f_mapping;
|
||||
|
||||
if (mapping)
|
||||
lockdep_assert(lockdep_is_held(&vma->vm_file->f_mapping->i_mmap_rwsem) ||
|
||||
lockdep_is_held(&vma->vm_mm->mmap_lock));
|
||||
else
|
||||
lockdep_assert(lockdep_is_held(&vma->vm_mm->mmap_lock));
|
||||
#endif
|
||||
}
|
||||
|
||||
/**
|
||||
* follow_pfnmap_start() - Look up a pfn mapping at a user virtual address
|
||||
* @args: Pointer to struct @follow_pfnmap_args
|
||||
*
|
||||
* The caller needs to setup args->vma and args->address to point to the
|
||||
* virtual address as the target of such lookup. On a successful return,
|
||||
* the results will be put into other output fields.
|
||||
*
|
||||
* After the caller finished using the fields, the caller must invoke
|
||||
* another follow_pfnmap_end() to proper releases the locks and resources
|
||||
* of such look up request.
|
||||
*
|
||||
* During the start() and end() calls, the results in @args will be valid
|
||||
* as proper locks will be held. After the end() is called, all the fields
|
||||
* in @follow_pfnmap_args will be invalid to be further accessed. Further
|
||||
* use of such information after end() may require proper synchronizations
|
||||
* by the caller with page table updates, otherwise it can create a
|
||||
* security bug.
|
||||
*
|
||||
* If the PTE maps a refcounted page, callers are responsible to protect
|
||||
* against invalidation with MMU notifiers; otherwise access to the PFN at
|
||||
* a later point in time can trigger use-after-free.
|
||||
*
|
||||
* Only IO mappings and raw PFN mappings are allowed. The mmap semaphore
|
||||
* should be taken for read, and the mmap semaphore cannot be released
|
||||
* before the end() is invoked.
|
||||
*
|
||||
* This function must not be used to modify PTE content.
|
||||
*
|
||||
* Return: zero on success, negative otherwise.
|
||||
*/
|
||||
int follow_pfnmap_start(struct follow_pfnmap_args *args)
|
||||
{
|
||||
struct vm_area_struct *vma = args->vma;
|
||||
unsigned long address = args->address;
|
||||
struct mm_struct *mm = vma->vm_mm;
|
||||
spinlock_t *lock;
|
||||
pgd_t *pgdp;
|
||||
p4d_t *p4dp, p4d;
|
||||
pud_t *pudp, pud;
|
||||
pmd_t *pmdp, pmd;
|
||||
pte_t *ptep, pte;
|
||||
|
||||
pfnmap_lockdep_assert(vma);
|
||||
|
||||
if (unlikely(address < vma->vm_start || address >= vma->vm_end))
|
||||
goto out;
|
||||
|
||||
if (!(vma->vm_flags & (VM_IO | VM_PFNMAP)))
|
||||
goto out;
|
||||
retry:
|
||||
pgdp = pgd_offset(mm, address);
|
||||
if (pgd_none(*pgdp) || unlikely(pgd_bad(*pgdp)))
|
||||
goto out;
|
||||
|
||||
p4dp = p4d_offset(pgdp, address);
|
||||
p4d = READ_ONCE(*p4dp);
|
||||
if (p4d_none(p4d) || unlikely(p4d_bad(p4d)))
|
||||
goto out;
|
||||
|
||||
pudp = pud_offset(p4dp, address);
|
||||
pud = READ_ONCE(*pudp);
|
||||
if (pud_none(pud))
|
||||
goto out;
|
||||
if (pud_leaf(pud)) {
|
||||
lock = pud_lock(mm, pudp);
|
||||
if (!unlikely(pud_leaf(pud))) {
|
||||
spin_unlock(lock);
|
||||
goto retry;
|
||||
}
|
||||
pfnmap_args_setup(args, lock, NULL, pud_pgprot(pud),
|
||||
pud_pfn(pud), PUD_MASK, pud_write(pud),
|
||||
pud_special(pud));
|
||||
return 0;
|
||||
}
|
||||
|
||||
pmdp = pmd_offset(pudp, address);
|
||||
pmd = pmdp_get_lockless(pmdp);
|
||||
if (pmd_leaf(pmd)) {
|
||||
lock = pmd_lock(mm, pmdp);
|
||||
if (!unlikely(pmd_leaf(pmd))) {
|
||||
spin_unlock(lock);
|
||||
goto retry;
|
||||
}
|
||||
pfnmap_args_setup(args, lock, NULL, pmd_pgprot(pmd),
|
||||
pmd_pfn(pmd), PMD_MASK, pmd_write(pmd),
|
||||
pmd_special(pmd));
|
||||
return 0;
|
||||
}
|
||||
|
||||
ptep = pte_offset_map_lock(mm, pmdp, address, &lock);
|
||||
if (!ptep)
|
||||
goto out;
|
||||
pte = ptep_get(ptep);
|
||||
if (!pte_present(pte))
|
||||
goto unlock;
|
||||
pfnmap_args_setup(args, lock, ptep, pte_pgprot(pte),
|
||||
pte_pfn(pte), PAGE_MASK, pte_write(pte),
|
||||
pte_special(pte));
|
||||
return 0;
|
||||
unlock:
|
||||
pte_unmap_unlock(ptep, lock);
|
||||
out:
|
||||
return -EINVAL;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(follow_pfnmap_start);
|
||||
|
||||
/**
|
||||
* follow_pfnmap_end(): End a follow_pfnmap_start() process
|
||||
* @args: Pointer to struct @follow_pfnmap_args
|
||||
*
|
||||
* Must be used in pair of follow_pfnmap_start(). See the start() function
|
||||
* above for more information.
|
||||
*/
|
||||
void follow_pfnmap_end(struct follow_pfnmap_args *args)
|
||||
{
|
||||
if (args->lock)
|
||||
spin_unlock(args->lock);
|
||||
if (args->ptep)
|
||||
pte_unmap(args->ptep);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(follow_pfnmap_end);
|
||||
|
||||
#ifdef CONFIG_HAVE_IOREMAP_PROT
|
||||
/**
|
||||
* generic_access_phys - generic implementation for iomem mmap access
|
||||
|
Loading…
Reference in New Issue
Block a user