mm/gup: fix hugepd handling in hugetlb rework
Commita12083d721
added hugepd handling for gup-slow, reusing gup-fast functions. follow_hugepd() correctly took the vma pointer in, however didn't pass it over into the lower functions, which was overlooked. The issue is gup_fast_hugepte() uses the vma pointer to make the correct decision on whether an unshare is needed for a FOLL_PIN|FOLL_LONGTERM. Now without vma ponter it will constantly return "true" (needs an unshare) for a page cache, even though in the SHARED case it will be wrong to unshare. The other problem is, even if an unshare is needed, it now returns 0 rather than -EMLINK, which will not trigger a follow up FAULT_FLAG_UNSHARE fault. That will need to be fixed too when the unshare is wanted. gup_longterm test didn't expose this issue in the past because it didn't yet test R/O unshare in this case, another separate patch will enable that in future tests. Fix it by passing vma correctly to the bottom, rename gup_fast_hugepte() back to gup_hugepte() as it is shared between the fast/slow paths, and also allow -EMLINK to be returned properly by gup_hugepte() even though gup-fast will take it the same as zero. Link: https://lkml.kernel.org/r/20240430131303.264331-1-peterx@redhat.com Fixes:a12083d721
("mm/gup: handle hugepd for follow_page()") Signed-off-by: Peter Xu <peterx@redhat.com> Reported-by: David Hildenbrand <david@redhat.com> Reviewed-by: David Hildenbrand <david@redhat.com> Cc: Aneesh Kumar K.V <aneesh.kumar@kernel.org> Cc: Christophe Leroy <christophe.leroy@csgroup.eu> Cc: Jason Gunthorpe <jgg@nvidia.com> Cc: John Hubbard <jhubbard@nvidia.com> Cc: Lorenzo Stoakes <lstoakes@gmail.com> Cc: Muchun Song <muchun.song@linux.dev> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
This commit is contained in:
parent
67f4c91a44
commit
01d89b93e1
64
mm/gup.c
64
mm/gup.c
@ -525,9 +525,17 @@ static unsigned long hugepte_addr_end(unsigned long addr, unsigned long end,
|
||||
return (__boundary - 1 < end - 1) ? __boundary : end;
|
||||
}
|
||||
|
||||
static int gup_fast_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr,
|
||||
unsigned long end, unsigned int flags, struct page **pages,
|
||||
int *nr)
|
||||
/*
|
||||
* Returns 1 if succeeded, 0 if failed, -EMLINK if unshare needed.
|
||||
*
|
||||
* NOTE: for the same entry, gup-fast and gup-slow can return different
|
||||
* results (0 v.s. -EMLINK) depending on whether vma is available. This is
|
||||
* the expected behavior, where we simply want gup-fast to fallback to
|
||||
* gup-slow to take the vma reference first.
|
||||
*/
|
||||
static int gup_hugepte(struct vm_area_struct *vma, pte_t *ptep, unsigned long sz,
|
||||
unsigned long addr, unsigned long end, unsigned int flags,
|
||||
struct page **pages, int *nr)
|
||||
{
|
||||
unsigned long pte_end;
|
||||
struct page *page;
|
||||
@ -559,9 +567,9 @@ static int gup_fast_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr,
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (!pte_write(pte) && gup_must_unshare(NULL, flags, &folio->page)) {
|
||||
if (!pte_write(pte) && gup_must_unshare(vma, flags, &folio->page)) {
|
||||
gup_put_folio(folio, refs, flags);
|
||||
return 0;
|
||||
return -EMLINK;
|
||||
}
|
||||
|
||||
*nr += refs;
|
||||
@ -577,19 +585,22 @@ static int gup_fast_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr,
|
||||
* of the other folios. See writable_file_mapping_allowed() and
|
||||
* gup_fast_folio_allowed() for more information.
|
||||
*/
|
||||
static int gup_fast_hugepd(hugepd_t hugepd, unsigned long addr,
|
||||
unsigned int pdshift, unsigned long end, unsigned int flags,
|
||||
struct page **pages, int *nr)
|
||||
static int gup_hugepd(struct vm_area_struct *vma, hugepd_t hugepd,
|
||||
unsigned long addr, unsigned int pdshift,
|
||||
unsigned long end, unsigned int flags,
|
||||
struct page **pages, int *nr)
|
||||
{
|
||||
pte_t *ptep;
|
||||
unsigned long sz = 1UL << hugepd_shift(hugepd);
|
||||
unsigned long next;
|
||||
int ret;
|
||||
|
||||
ptep = hugepte_offset(hugepd, addr, pdshift);
|
||||
do {
|
||||
next = hugepte_addr_end(addr, end, sz);
|
||||
if (!gup_fast_hugepte(ptep, sz, addr, end, flags, pages, nr))
|
||||
return 0;
|
||||
ret = gup_hugepte(vma, ptep, sz, addr, end, flags, pages, nr);
|
||||
if (ret != 1)
|
||||
return ret;
|
||||
} while (ptep++, addr = next, addr != end);
|
||||
|
||||
return 1;
|
||||
@ -613,22 +624,25 @@ static struct page *follow_hugepd(struct vm_area_struct *vma, hugepd_t hugepd,
|
||||
h = hstate_vma(vma);
|
||||
ptep = hugepte_offset(hugepd, addr, pdshift);
|
||||
ptl = huge_pte_lock(h, vma->vm_mm, ptep);
|
||||
ret = gup_fast_hugepd(hugepd, addr, pdshift, addr + PAGE_SIZE,
|
||||
flags, &page, &nr);
|
||||
ret = gup_hugepd(vma, hugepd, addr, pdshift, addr + PAGE_SIZE,
|
||||
flags, &page, &nr);
|
||||
spin_unlock(ptl);
|
||||
|
||||
if (ret) {
|
||||
if (ret == 1) {
|
||||
/* GUP succeeded */
|
||||
WARN_ON_ONCE(nr != 1);
|
||||
ctx->page_mask = (1U << huge_page_order(h)) - 1;
|
||||
return page;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
/* ret can be either 0 (translates to NULL) or negative */
|
||||
return ERR_PTR(ret);
|
||||
}
|
||||
#else /* CONFIG_ARCH_HAS_HUGEPD */
|
||||
static inline int gup_fast_hugepd(hugepd_t hugepd, unsigned long addr,
|
||||
unsigned int pdshift, unsigned long end, unsigned int flags,
|
||||
struct page **pages, int *nr)
|
||||
static inline int gup_hugepd(struct vm_area_struct *vma, hugepd_t hugepd,
|
||||
unsigned long addr, unsigned int pdshift,
|
||||
unsigned long end, unsigned int flags,
|
||||
struct page **pages, int *nr)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
@ -3261,8 +3275,8 @@ static int gup_fast_pmd_range(pud_t *pudp, pud_t pud, unsigned long addr,
|
||||
* architecture have different format for hugetlbfs
|
||||
* pmd format and THP pmd format
|
||||
*/
|
||||
if (!gup_fast_hugepd(__hugepd(pmd_val(pmd)), addr,
|
||||
PMD_SHIFT, next, flags, pages, nr))
|
||||
if (gup_hugepd(NULL, __hugepd(pmd_val(pmd)), addr,
|
||||
PMD_SHIFT, next, flags, pages, nr) != 1)
|
||||
return 0;
|
||||
} else if (!gup_fast_pte_range(pmd, pmdp, addr, next, flags,
|
||||
pages, nr))
|
||||
@ -3291,8 +3305,8 @@ static int gup_fast_pud_range(p4d_t *p4dp, p4d_t p4d, unsigned long addr,
|
||||
pages, nr))
|
||||
return 0;
|
||||
} else if (unlikely(is_hugepd(__hugepd(pud_val(pud))))) {
|
||||
if (!gup_fast_hugepd(__hugepd(pud_val(pud)), addr,
|
||||
PUD_SHIFT, next, flags, pages, nr))
|
||||
if (gup_hugepd(NULL, __hugepd(pud_val(pud)), addr,
|
||||
PUD_SHIFT, next, flags, pages, nr) != 1)
|
||||
return 0;
|
||||
} else if (!gup_fast_pmd_range(pudp, pud, addr, next, flags,
|
||||
pages, nr))
|
||||
@ -3318,8 +3332,8 @@ static int gup_fast_p4d_range(pgd_t *pgdp, pgd_t pgd, unsigned long addr,
|
||||
return 0;
|
||||
BUILD_BUG_ON(p4d_leaf(p4d));
|
||||
if (unlikely(is_hugepd(__hugepd(p4d_val(p4d))))) {
|
||||
if (!gup_fast_hugepd(__hugepd(p4d_val(p4d)), addr,
|
||||
P4D_SHIFT, next, flags, pages, nr))
|
||||
if (gup_hugepd(NULL, __hugepd(p4d_val(p4d)), addr,
|
||||
P4D_SHIFT, next, flags, pages, nr) != 1)
|
||||
return 0;
|
||||
} else if (!gup_fast_pud_range(p4dp, p4d, addr, next, flags,
|
||||
pages, nr))
|
||||
@ -3347,8 +3361,8 @@ static void gup_fast_pgd_range(unsigned long addr, unsigned long end,
|
||||
pages, nr))
|
||||
return;
|
||||
} else if (unlikely(is_hugepd(__hugepd(pgd_val(pgd))))) {
|
||||
if (!gup_fast_hugepd(__hugepd(pgd_val(pgd)), addr,
|
||||
PGDIR_SHIFT, next, flags, pages, nr))
|
||||
if (gup_hugepd(NULL, __hugepd(pgd_val(pgd)), addr,
|
||||
PGDIR_SHIFT, next, flags, pages, nr) != 1)
|
||||
return;
|
||||
} else if (!gup_fast_p4d_range(pgdp, pgd, addr, next, flags,
|
||||
pages, nr))
|
||||
|
Loading…
Reference in New Issue
Block a user