mm: move vma_shrink(), vma_expand() to internal header
The vma_shrink() and vma_expand() functions are internal VMA manipulation functions which we ought to abstract for use outside of memory management code. To achieve this, we replace shift_arg_pages() in fs/exec.c with an invocation of a new relocate_vma_down() function implemented in mm/mmap.c, which enables us to also move move_page_tables() and vma_iter_prev_range() to internal.h. The purpose of doing this is to isolate key VMA manipulation functions in order that we can both abstract them and later render them easily testable. Link: https://lkml.kernel.org/r/3cfcd9ec433e032a85f636fdc0d7d98fafbd19c5.1722251717.git.lorenzo.stoakes@oracle.com Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com> Reviewed-by: Vlastimil Babka <vbabka@suse.cz> Reviewed-by: Liam R. Howlett <Liam.Howlett@oracle.com> Cc: Alexander Viro <viro@zeniv.linux.org.uk> Cc: Brendan Higgins <brendanhiggins@google.com> Cc: Christian Brauner <brauner@kernel.org> Cc: David Gow <davidgow@google.com> Cc: Eric W. Biederman <ebiederm@xmission.com> Cc: Jan Kara <jack@suse.cz> Cc: Kees Cook <kees@kernel.org> Cc: Matthew Wilcox (Oracle) <willy@infradead.org> Cc: Rae Moar <rmoar@google.com> Cc: SeongJae Park <sj@kernel.org> Cc: Shuah Khan <shuah@kernel.org> Cc: Suren Baghdasaryan <surenb@google.com> Cc: Pengfei Xu <pengfei.xu@intel.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
This commit is contained in:
parent
fa04c08f3c
commit
d61f0d5968
81
fs/exec.c
81
fs/exec.c
@ -711,80 +711,6 @@ static int copy_strings_kernel(int argc, const char *const *argv,
|
||||
|
||||
#ifdef CONFIG_MMU
|
||||
|
||||
/*
|
||||
* During bprm_mm_init(), we create a temporary stack at STACK_TOP_MAX. Once
|
||||
* the binfmt code determines where the new stack should reside, we shift it to
|
||||
* its final location. The process proceeds as follows:
|
||||
*
|
||||
* 1) Use shift to calculate the new vma endpoints.
|
||||
* 2) Extend vma to cover both the old and new ranges. This ensures the
|
||||
* arguments passed to subsequent functions are consistent.
|
||||
* 3) Move vma's page tables to the new range.
|
||||
* 4) Free up any cleared pgd range.
|
||||
* 5) Shrink the vma to cover only the new range.
|
||||
*/
|
||||
static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift)
|
||||
{
|
||||
struct mm_struct *mm = vma->vm_mm;
|
||||
unsigned long old_start = vma->vm_start;
|
||||
unsigned long old_end = vma->vm_end;
|
||||
unsigned long length = old_end - old_start;
|
||||
unsigned long new_start = old_start - shift;
|
||||
unsigned long new_end = old_end - shift;
|
||||
VMA_ITERATOR(vmi, mm, new_start);
|
||||
struct vm_area_struct *next;
|
||||
struct mmu_gather tlb;
|
||||
|
||||
BUG_ON(new_start > new_end);
|
||||
|
||||
/*
|
||||
* ensure there are no vmas between where we want to go
|
||||
* and where we are
|
||||
*/
|
||||
if (vma != vma_next(&vmi))
|
||||
return -EFAULT;
|
||||
|
||||
vma_iter_prev_range(&vmi);
|
||||
/*
|
||||
* cover the whole range: [new_start, old_end)
|
||||
*/
|
||||
if (vma_expand(&vmi, vma, new_start, old_end, vma->vm_pgoff, NULL))
|
||||
return -ENOMEM;
|
||||
|
||||
/*
|
||||
* move the page tables downwards, on failure we rely on
|
||||
* process cleanup to remove whatever mess we made.
|
||||
*/
|
||||
if (length != move_page_tables(vma, old_start,
|
||||
vma, new_start, length, false, true))
|
||||
return -ENOMEM;
|
||||
|
||||
lru_add_drain();
|
||||
tlb_gather_mmu(&tlb, mm);
|
||||
next = vma_next(&vmi);
|
||||
if (new_end > old_start) {
|
||||
/*
|
||||
* when the old and new regions overlap clear from new_end.
|
||||
*/
|
||||
free_pgd_range(&tlb, new_end, old_end, new_end,
|
||||
next ? next->vm_start : USER_PGTABLES_CEILING);
|
||||
} else {
|
||||
/*
|
||||
* otherwise, clean from old_start; this is done to not touch
|
||||
* the address space in [new_end, old_start) some architectures
|
||||
* have constraints on va-space that make this illegal (IA64) -
|
||||
* for the others its just a little faster.
|
||||
*/
|
||||
free_pgd_range(&tlb, old_start, old_end, new_end,
|
||||
next ? next->vm_start : USER_PGTABLES_CEILING);
|
||||
}
|
||||
tlb_finish_mmu(&tlb);
|
||||
|
||||
vma_prev(&vmi);
|
||||
/* Shrink the vma to just the new range */
|
||||
return vma_shrink(&vmi, vma, new_start, new_end, vma->vm_pgoff);
|
||||
}
|
||||
|
||||
/*
|
||||
* Finalizes the stack vm_area_struct. The flags and permissions are updated,
|
||||
* the stack is optionally relocated, and some extra space is added.
|
||||
@ -877,7 +803,12 @@ int setup_arg_pages(struct linux_binprm *bprm,
|
||||
|
||||
/* Move stack pages down in memory. */
|
||||
if (stack_shift) {
|
||||
ret = shift_arg_pages(vma, stack_shift);
|
||||
/*
|
||||
* During bprm_mm_init(), we create a temporary stack at STACK_TOP_MAX. Once
|
||||
* the binfmt code determines where the new stack should reside, we shift it to
|
||||
* its final location.
|
||||
*/
|
||||
ret = relocate_vma_down(vma, stack_shift);
|
||||
if (ret)
|
||||
goto out_unlock;
|
||||
}
|
||||
|
@ -1005,12 +1005,6 @@ static inline struct vm_area_struct *vma_prev(struct vma_iterator *vmi)
|
||||
return mas_prev(&vmi->mas, 0);
|
||||
}
|
||||
|
||||
static inline
|
||||
struct vm_area_struct *vma_iter_prev_range(struct vma_iterator *vmi)
|
||||
{
|
||||
return mas_prev_range(&vmi->mas, 0);
|
||||
}
|
||||
|
||||
static inline unsigned long vma_iter_addr(struct vma_iterator *vmi)
|
||||
{
|
||||
return vmi->mas.index;
|
||||
@ -2520,11 +2514,6 @@ int set_page_dirty_lock(struct page *page);
|
||||
|
||||
int get_cmdline(struct task_struct *task, char *buffer, int buflen);
|
||||
|
||||
extern unsigned long move_page_tables(struct vm_area_struct *vma,
|
||||
unsigned long old_addr, struct vm_area_struct *new_vma,
|
||||
unsigned long new_addr, unsigned long len,
|
||||
bool need_rmap_locks, bool for_stack);
|
||||
|
||||
/*
|
||||
* Flags used by change_protection(). For now we make it a bitmap so
|
||||
* that we can pass in multiple flags just like parameters. However
|
||||
@ -3267,11 +3256,6 @@ void anon_vma_interval_tree_verify(struct anon_vma_chain *node);
|
||||
|
||||
/* mmap.c */
|
||||
extern int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin);
|
||||
extern int vma_expand(struct vma_iterator *vmi, struct vm_area_struct *vma,
|
||||
unsigned long start, unsigned long end, pgoff_t pgoff,
|
||||
struct vm_area_struct *next);
|
||||
extern int vma_shrink(struct vma_iterator *vmi, struct vm_area_struct *vma,
|
||||
unsigned long start, unsigned long end, pgoff_t pgoff);
|
||||
extern struct anon_vma *find_mergeable_anon_vma(struct vm_area_struct *);
|
||||
extern int insert_vm_struct(struct mm_struct *, struct vm_area_struct *);
|
||||
extern void unlink_file_vma(struct vm_area_struct *);
|
||||
@ -3279,6 +3263,7 @@ extern struct vm_area_struct *copy_vma(struct vm_area_struct **,
|
||||
unsigned long addr, unsigned long len, pgoff_t pgoff,
|
||||
bool *need_rmap_locks);
|
||||
extern void exit_mmap(struct mm_struct *);
|
||||
int relocate_vma_down(struct vm_area_struct *vma, unsigned long shift);
|
||||
|
||||
static inline int check_data_rlimit(unsigned long rlim,
|
||||
unsigned long new,
|
||||
|
@ -1305,6 +1305,12 @@ static inline struct vm_area_struct
|
||||
vma_policy(vma), new_ctx, anon_vma_name(vma));
|
||||
}
|
||||
|
||||
int vma_expand(struct vma_iterator *vmi, struct vm_area_struct *vma,
|
||||
unsigned long start, unsigned long end, pgoff_t pgoff,
|
||||
struct vm_area_struct *next);
|
||||
int vma_shrink(struct vma_iterator *vmi, struct vm_area_struct *vma,
|
||||
unsigned long start, unsigned long end, pgoff_t pgoff);
|
||||
|
||||
enum {
|
||||
/* mark page accessed */
|
||||
FOLL_TOUCH = 1 << 16,
|
||||
@ -1528,6 +1534,12 @@ static inline int vma_iter_store_gfp(struct vma_iterator *vmi,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline
|
||||
struct vm_area_struct *vma_iter_prev_range(struct vma_iterator *vmi)
|
||||
{
|
||||
return mas_prev_range(&vmi->mas, 0);
|
||||
}
|
||||
|
||||
/*
|
||||
* VMA lock generalization
|
||||
*/
|
||||
@ -1639,4 +1651,10 @@ void unlink_file_vma_batch_init(struct unlink_vma_file_batch *);
|
||||
void unlink_file_vma_batch_add(struct unlink_vma_file_batch *, struct vm_area_struct *);
|
||||
void unlink_file_vma_batch_final(struct unlink_vma_file_batch *);
|
||||
|
||||
/* mremap.c */
|
||||
unsigned long move_page_tables(struct vm_area_struct *vma,
|
||||
unsigned long old_addr, struct vm_area_struct *new_vma,
|
||||
unsigned long new_addr, unsigned long len,
|
||||
bool need_rmap_locks, bool for_stack);
|
||||
|
||||
#endif /* __MM_INTERNAL_H */
|
||||
|
81
mm/mmap.c
81
mm/mmap.c
@ -4088,3 +4088,84 @@ static int __meminit init_reserve_notifier(void)
|
||||
return 0;
|
||||
}
|
||||
subsys_initcall(init_reserve_notifier);
|
||||
|
||||
/*
|
||||
* Relocate a VMA downwards by shift bytes. There cannot be any VMAs between
|
||||
* this VMA and its relocated range, which will now reside at [vma->vm_start -
|
||||
* shift, vma->vm_end - shift).
|
||||
*
|
||||
* This function is almost certainly NOT what you want for anything other than
|
||||
* early executable temporary stack relocation.
|
||||
*/
|
||||
int relocate_vma_down(struct vm_area_struct *vma, unsigned long shift)
|
||||
{
|
||||
/*
|
||||
* The process proceeds as follows:
|
||||
*
|
||||
* 1) Use shift to calculate the new vma endpoints.
|
||||
* 2) Extend vma to cover both the old and new ranges. This ensures the
|
||||
* arguments passed to subsequent functions are consistent.
|
||||
* 3) Move vma's page tables to the new range.
|
||||
* 4) Free up any cleared pgd range.
|
||||
* 5) Shrink the vma to cover only the new range.
|
||||
*/
|
||||
|
||||
struct mm_struct *mm = vma->vm_mm;
|
||||
unsigned long old_start = vma->vm_start;
|
||||
unsigned long old_end = vma->vm_end;
|
||||
unsigned long length = old_end - old_start;
|
||||
unsigned long new_start = old_start - shift;
|
||||
unsigned long new_end = old_end - shift;
|
||||
VMA_ITERATOR(vmi, mm, new_start);
|
||||
struct vm_area_struct *next;
|
||||
struct mmu_gather tlb;
|
||||
|
||||
BUG_ON(new_start > new_end);
|
||||
|
||||
/*
|
||||
* ensure there are no vmas between where we want to go
|
||||
* and where we are
|
||||
*/
|
||||
if (vma != vma_next(&vmi))
|
||||
return -EFAULT;
|
||||
|
||||
vma_iter_prev_range(&vmi);
|
||||
/*
|
||||
* cover the whole range: [new_start, old_end)
|
||||
*/
|
||||
if (vma_expand(&vmi, vma, new_start, old_end, vma->vm_pgoff, NULL))
|
||||
return -ENOMEM;
|
||||
|
||||
/*
|
||||
* move the page tables downwards, on failure we rely on
|
||||
* process cleanup to remove whatever mess we made.
|
||||
*/
|
||||
if (length != move_page_tables(vma, old_start,
|
||||
vma, new_start, length, false, true))
|
||||
return -ENOMEM;
|
||||
|
||||
lru_add_drain();
|
||||
tlb_gather_mmu(&tlb, mm);
|
||||
next = vma_next(&vmi);
|
||||
if (new_end > old_start) {
|
||||
/*
|
||||
* when the old and new regions overlap clear from new_end.
|
||||
*/
|
||||
free_pgd_range(&tlb, new_end, old_end, new_end,
|
||||
next ? next->vm_start : USER_PGTABLES_CEILING);
|
||||
} else {
|
||||
/*
|
||||
* otherwise, clean from old_start; this is done to not touch
|
||||
* the address space in [new_end, old_start) some architectures
|
||||
* have constraints on va-space that make this illegal (IA64) -
|
||||
* for the others its just a little faster.
|
||||
*/
|
||||
free_pgd_range(&tlb, old_start, old_end, new_end,
|
||||
next ? next->vm_start : USER_PGTABLES_CEILING);
|
||||
}
|
||||
tlb_finish_mmu(&tlb);
|
||||
|
||||
vma_prev(&vmi);
|
||||
/* Shrink the vma to just the new range */
|
||||
return vma_shrink(&vmi, vma, new_start, new_end, vma->vm_pgoff);
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user