2024-07-29 04:50:38 -07:00
|
|
|
/* SPDX-License-Identifier: GPL-2.0-or-later */
|
|
|
|
/*
|
|
|
|
* vma.h
|
|
|
|
*
|
|
|
|
* Core VMA manipulation API implemented in vma.c.
|
|
|
|
*/
|
|
|
|
#ifndef __MM_VMA_H
|
|
|
|
#define __MM_VMA_H
|
|
|
|
|
|
|
|
/*
|
|
|
|
* VMA lock generalization
|
|
|
|
*/
|
|
|
|
struct vma_prepare {
|
|
|
|
struct vm_area_struct *vma;
|
|
|
|
struct vm_area_struct *adj_next;
|
|
|
|
struct file *file;
|
|
|
|
struct address_space *mapping;
|
|
|
|
struct anon_vma *anon_vma;
|
|
|
|
struct vm_area_struct *insert;
|
|
|
|
struct vm_area_struct *remove;
|
|
|
|
struct vm_area_struct *remove2;
|
|
|
|
};
|
|
|
|
|
|
|
|
struct unlink_vma_file_batch {
|
|
|
|
int count;
|
|
|
|
struct vm_area_struct *vmas[8];
|
|
|
|
};
|
|
|
|
|
2024-08-29 21:00:45 -07:00
|
|
|
/*
|
|
|
|
* vma munmap operation
|
|
|
|
*/
|
|
|
|
struct vma_munmap_struct {
|
|
|
|
struct vma_iterator *vmi;
|
|
|
|
struct vm_area_struct *vma; /* The first vma to munmap */
|
2024-08-29 21:00:46 -07:00
|
|
|
struct vm_area_struct *prev; /* vma before the munmap area */
|
|
|
|
struct vm_area_struct *next; /* vma after the munmap area */
|
2024-08-29 21:00:45 -07:00
|
|
|
struct list_head *uf; /* Userfaultfd list_head */
|
|
|
|
unsigned long start; /* Aligned start addr (inclusive) */
|
|
|
|
unsigned long end; /* Aligned end addr (exclusive) */
|
2024-08-29 21:00:52 -07:00
|
|
|
unsigned long unmap_start; /* Unmap PTE start */
|
|
|
|
unsigned long unmap_end; /* Unmap PTE end */
|
2024-08-29 21:00:45 -07:00
|
|
|
int vma_count; /* Number of vmas that will be removed */
|
mm/vma.h: optimise vma_munmap_struct
The vma_munmap_struct has a hole of 4 bytes and pushes the struct to three
cachelines. Relocating the three booleans upwards allows for the struct
to only use two cachelines (as reported by pahole on amd64).
Before:
struct vma_munmap_struct {
struct vma_iterator * vmi; /* 0 8 */
struct vm_area_struct * vma; /* 8 8 */
struct vm_area_struct * prev; /* 16 8 */
struct vm_area_struct * next; /* 24 8 */
struct list_head * uf; /* 32 8 */
long unsigned int start; /* 40 8 */
long unsigned int end; /* 48 8 */
long unsigned int unmap_start; /* 56 8 */
/* --- cacheline 1 boundary (64 bytes) --- */
long unsigned int unmap_end; /* 64 8 */
int vma_count; /* 72 4 */
/* XXX 4 bytes hole, try to pack */
long unsigned int nr_pages; /* 80 8 */
long unsigned int locked_vm; /* 88 8 */
long unsigned int nr_accounted; /* 96 8 */
long unsigned int exec_vm; /* 104 8 */
long unsigned int stack_vm; /* 112 8 */
long unsigned int data_vm; /* 120 8 */
/* --- cacheline 2 boundary (128 bytes) --- */
bool unlock; /* 128 1 */
bool clear_ptes; /* 129 1 */
bool closed_vm_ops; /* 130 1 */
/* size: 136, cachelines: 3, members: 19 */
/* sum members: 127, holes: 1, sum holes: 4 */
/* padding: 5 */
/* last cacheline: 8 bytes */
};
After:
struct vma_munmap_struct {
struct vma_iterator * vmi; /* 0 8 */
struct vm_area_struct * vma; /* 8 8 */
struct vm_area_struct * prev; /* 16 8 */
struct vm_area_struct * next; /* 24 8 */
struct list_head * uf; /* 32 8 */
long unsigned int start; /* 40 8 */
long unsigned int end; /* 48 8 */
long unsigned int unmap_start; /* 56 8 */
/* --- cacheline 1 boundary (64 bytes) --- */
long unsigned int unmap_end; /* 64 8 */
int vma_count; /* 72 4 */
bool unlock; /* 76 1 */
bool clear_ptes; /* 77 1 */
bool closed_vm_ops; /* 78 1 */
/* XXX 1 byte hole, try to pack */
long unsigned int nr_pages; /* 80 8 */
long unsigned int locked_vm; /* 88 8 */
long unsigned int nr_accounted; /* 96 8 */
long unsigned int exec_vm; /* 104 8 */
long unsigned int stack_vm; /* 112 8 */
long unsigned int data_vm; /* 120 8 */
/* size: 128, cachelines: 2, members: 19 */
/* sum members: 127, holes: 1, sum holes: 1 */
};
Link: https://lkml.kernel.org/r/20240830040101.822209-22-Liam.Howlett@oracle.com
Signed-off-by: Liam R. Howlett <Liam.Howlett@Oracle.com>
Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Cc: Bert Karwatzki <spasswolf@web.de>
Cc: Jeff Xu <jeffxu@chromium.org>
Cc: Jiri Olsa <olsajiri@gmail.com>
Cc: Kees Cook <kees@kernel.org>
Cc: Lorenzo Stoakes <lstoakes@gmail.com>
Cc: Mark Brown <broonie@kernel.org>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: "Paul E. McKenney" <paulmck@kernel.org>
Cc: Paul Moore <paul@paul-moore.com>
Cc: Sidhartha Kumar <sidhartha.kumar@oracle.com>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2024-08-29 21:01:01 -07:00
|
|
|
bool unlock; /* Unlock after the munmap */
|
|
|
|
bool clear_ptes; /* If there are outstanding PTE to be cleared */
|
2024-10-29 11:11:46 -07:00
|
|
|
/* 2 byte hole */
|
2024-08-29 21:00:45 -07:00
|
|
|
unsigned long nr_pages; /* Number of pages being removed */
|
|
|
|
unsigned long locked_vm; /* Number of locked pages */
|
2024-08-29 21:00:46 -07:00
|
|
|
unsigned long nr_accounted; /* Number of VM_ACCOUNT pages */
|
|
|
|
unsigned long exec_vm;
|
|
|
|
unsigned long stack_vm;
|
|
|
|
unsigned long data_vm;
|
2024-08-29 21:00:45 -07:00
|
|
|
};
|
|
|
|
|
mm: avoid using vma_merge() for new VMAs
Abstract vma_merge_new_vma() to use vma_merge_struct and rename the
resultant function vma_merge_new_range() to be clear what the purpose of
this function is - a new VMA is desired in the specified range, and we
wish to see if it is possible to 'merge' surrounding VMAs into this range
rather than having to allocate a new VMA.
Note that this function uses vma_extend() exclusively, so adopts its
requirement that the iterator point at or before the gap. We add an
assert to this effect.
This is as opposed to vma_merge_existing_range(), which will be introduced
in a subsequent commit, and provide the same functionality for cases in
which we are modifying an existing VMA.
In mmap_region() and do_brk_flags() we open code scenarios where we prefer
to use vma_expand() rather than invoke a full vma_merge() operation.
Abstract this logic and eliminate all of the open-coding, and also use the
same logic for all cases where we add new VMAs to, rather than ultimately
use vma_merge(), rather use vma_expand().
Doing so removes duplication and simplifies VMA merging in all such cases,
laying the ground for us to eliminate the merging of new VMAs in
vma_merge() altogether.
Also add the ability for the vmg to track state, and able to report
errors, allowing for us to differentiate a failed merge from an inability
to allocate memory in callers.
This makes it far easier to understand what is happening in these cases
avoiding confusion, bugs and allowing for future optimisation.
Also introduce vma_iter_next_rewind() to allow for retrieval of the next,
and (optionally) the prev VMA, rewinding to the start of the previous gap.
Introduce are_anon_vmas_compatible() to abstract individual VMA anon_vma
comparison for the case of merging on both sides where the anon_vma of the
VMA being merged maybe compatible with prev and next, but prev and next's
anon_vma's may not be compatible with each other.
Finally also introduce can_vma_merge_left() / can_vma_merge_right() to
check adjacent VMA compatibility and that they are indeed adjacent.
Link: https://lkml.kernel.org/r/49d37c0769b6b9dc03b27fe4d059173832556392.1725040657.git.lorenzo.stoakes@oracle.com
Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Tested-by: Mark Brown <broonie@kernel.org>
Cc: Liam R. Howlett <Liam.Howlett@oracle.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Bert Karwatzki <spasswolf@web.de>
Cc: Jeff Xu <jeffxu@chromium.org>
Cc: Jiri Olsa <olsajiri@gmail.com>
Cc: Kees Cook <kees@kernel.org>
Cc: Lorenzo Stoakes <lstoakes@gmail.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: "Paul E. McKenney" <paulmck@kernel.org>
Cc: Paul Moore <paul@paul-moore.com>
Cc: Sidhartha Kumar <sidhartha.kumar@oracle.com>
Cc: Suren Baghdasaryan <surenb@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2024-08-30 11:10:18 -07:00
|
|
|
enum vma_merge_state {
|
|
|
|
VMA_MERGE_START,
|
|
|
|
VMA_MERGE_ERROR_NOMEM,
|
|
|
|
VMA_MERGE_NOMERGE,
|
|
|
|
VMA_MERGE_SUCCESS,
|
|
|
|
};
|
|
|
|
|
2024-10-17 07:31:45 -07:00
|
|
|
enum vma_merge_flags {
|
|
|
|
VMG_FLAG_DEFAULT = 0,
|
|
|
|
/*
|
|
|
|
* If we can expand, simply do so. We know there is nothing to merge to
|
|
|
|
* the right. Does not reset state upon failure to merge. The VMA
|
|
|
|
* iterator is assumed to be positioned at the previous VMA, rather than
|
|
|
|
* at the gap.
|
|
|
|
*/
|
|
|
|
VMG_FLAG_JUST_EXPAND = 1 << 0,
|
|
|
|
};
|
|
|
|
|
2024-08-30 11:10:15 -07:00
|
|
|
/* Represents a VMA merge operation. */
|
|
|
|
struct vma_merge_struct {
|
|
|
|
struct mm_struct *mm;
|
|
|
|
struct vma_iterator *vmi;
|
|
|
|
pgoff_t pgoff;
|
|
|
|
struct vm_area_struct *prev;
|
|
|
|
struct vm_area_struct *next; /* Modified by vma_merge(). */
|
|
|
|
struct vm_area_struct *vma; /* Either a new VMA or the one being modified. */
|
|
|
|
unsigned long start;
|
|
|
|
unsigned long end;
|
|
|
|
unsigned long flags;
|
|
|
|
struct file *file;
|
|
|
|
struct anon_vma *anon_vma;
|
|
|
|
struct mempolicy *policy;
|
|
|
|
struct vm_userfaultfd_ctx uffd_ctx;
|
|
|
|
struct anon_vma_name *anon_name;
|
2024-10-17 07:31:45 -07:00
|
|
|
enum vma_merge_flags merge_flags;
|
mm: avoid using vma_merge() for new VMAs
Abstract vma_merge_new_vma() to use vma_merge_struct and rename the
resultant function vma_merge_new_range() to be clear what the purpose of
this function is - a new VMA is desired in the specified range, and we
wish to see if it is possible to 'merge' surrounding VMAs into this range
rather than having to allocate a new VMA.
Note that this function uses vma_extend() exclusively, so adopts its
requirement that the iterator point at or before the gap. We add an
assert to this effect.
This is as opposed to vma_merge_existing_range(), which will be introduced
in a subsequent commit, and provide the same functionality for cases in
which we are modifying an existing VMA.
In mmap_region() and do_brk_flags() we open code scenarios where we prefer
to use vma_expand() rather than invoke a full vma_merge() operation.
Abstract this logic and eliminate all of the open-coding, and also use the
same logic for all cases where we add new VMAs to, rather than ultimately
use vma_merge(), rather use vma_expand().
Doing so removes duplication and simplifies VMA merging in all such cases,
laying the ground for us to eliminate the merging of new VMAs in
vma_merge() altogether.
Also add the ability for the vmg to track state, and able to report
errors, allowing for us to differentiate a failed merge from an inability
to allocate memory in callers.
This makes it far easier to understand what is happening in these cases
avoiding confusion, bugs and allowing for future optimisation.
Also introduce vma_iter_next_rewind() to allow for retrieval of the next,
and (optionally) the prev VMA, rewinding to the start of the previous gap.
Introduce are_anon_vmas_compatible() to abstract individual VMA anon_vma
comparison for the case of merging on both sides where the anon_vma of the
VMA being merged maybe compatible with prev and next, but prev and next's
anon_vma's may not be compatible with each other.
Finally also introduce can_vma_merge_left() / can_vma_merge_right() to
check adjacent VMA compatibility and that they are indeed adjacent.
Link: https://lkml.kernel.org/r/49d37c0769b6b9dc03b27fe4d059173832556392.1725040657.git.lorenzo.stoakes@oracle.com
Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Tested-by: Mark Brown <broonie@kernel.org>
Cc: Liam R. Howlett <Liam.Howlett@oracle.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Bert Karwatzki <spasswolf@web.de>
Cc: Jeff Xu <jeffxu@chromium.org>
Cc: Jiri Olsa <olsajiri@gmail.com>
Cc: Kees Cook <kees@kernel.org>
Cc: Lorenzo Stoakes <lstoakes@gmail.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: "Paul E. McKenney" <paulmck@kernel.org>
Cc: Paul Moore <paul@paul-moore.com>
Cc: Sidhartha Kumar <sidhartha.kumar@oracle.com>
Cc: Suren Baghdasaryan <surenb@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2024-08-30 11:10:18 -07:00
|
|
|
enum vma_merge_state state;
|
2024-08-30 11:10:15 -07:00
|
|
|
};
|
|
|
|
|
mm: avoid using vma_merge() for new VMAs
Abstract vma_merge_new_vma() to use vma_merge_struct and rename the
resultant function vma_merge_new_range() to be clear what the purpose of
this function is - a new VMA is desired in the specified range, and we
wish to see if it is possible to 'merge' surrounding VMAs into this range
rather than having to allocate a new VMA.
Note that this function uses vma_extend() exclusively, so adopts its
requirement that the iterator point at or before the gap. We add an
assert to this effect.
This is as opposed to vma_merge_existing_range(), which will be introduced
in a subsequent commit, and provide the same functionality for cases in
which we are modifying an existing VMA.
In mmap_region() and do_brk_flags() we open code scenarios where we prefer
to use vma_expand() rather than invoke a full vma_merge() operation.
Abstract this logic and eliminate all of the open-coding, and also use the
same logic for all cases where we add new VMAs to, rather than ultimately
use vma_merge(), rather use vma_expand().
Doing so removes duplication and simplifies VMA merging in all such cases,
laying the ground for us to eliminate the merging of new VMAs in
vma_merge() altogether.
Also add the ability for the vmg to track state, and able to report
errors, allowing for us to differentiate a failed merge from an inability
to allocate memory in callers.
This makes it far easier to understand what is happening in these cases
avoiding confusion, bugs and allowing for future optimisation.
Also introduce vma_iter_next_rewind() to allow for retrieval of the next,
and (optionally) the prev VMA, rewinding to the start of the previous gap.
Introduce are_anon_vmas_compatible() to abstract individual VMA anon_vma
comparison for the case of merging on both sides where the anon_vma of the
VMA being merged maybe compatible with prev and next, but prev and next's
anon_vma's may not be compatible with each other.
Finally also introduce can_vma_merge_left() / can_vma_merge_right() to
check adjacent VMA compatibility and that they are indeed adjacent.
Link: https://lkml.kernel.org/r/49d37c0769b6b9dc03b27fe4d059173832556392.1725040657.git.lorenzo.stoakes@oracle.com
Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Tested-by: Mark Brown <broonie@kernel.org>
Cc: Liam R. Howlett <Liam.Howlett@oracle.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Bert Karwatzki <spasswolf@web.de>
Cc: Jeff Xu <jeffxu@chromium.org>
Cc: Jiri Olsa <olsajiri@gmail.com>
Cc: Kees Cook <kees@kernel.org>
Cc: Lorenzo Stoakes <lstoakes@gmail.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: "Paul E. McKenney" <paulmck@kernel.org>
Cc: Paul Moore <paul@paul-moore.com>
Cc: Sidhartha Kumar <sidhartha.kumar@oracle.com>
Cc: Suren Baghdasaryan <surenb@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2024-08-30 11:10:18 -07:00
|
|
|
static inline bool vmg_nomem(struct vma_merge_struct *vmg)
|
|
|
|
{
|
|
|
|
return vmg->state == VMA_MERGE_ERROR_NOMEM;
|
|
|
|
}
|
|
|
|
|
2024-08-30 11:10:15 -07:00
|
|
|
/* Assumes addr >= vma->vm_start. */
|
|
|
|
static inline pgoff_t vma_pgoff_offset(struct vm_area_struct *vma,
|
|
|
|
unsigned long addr)
|
|
|
|
{
|
|
|
|
return vma->vm_pgoff + PHYS_PFN(addr - vma->vm_start);
|
|
|
|
}
|
|
|
|
|
|
|
|
#define VMG_STATE(name, mm_, vmi_, start_, end_, flags_, pgoff_) \
|
|
|
|
struct vma_merge_struct name = { \
|
|
|
|
.mm = mm_, \
|
|
|
|
.vmi = vmi_, \
|
|
|
|
.start = start_, \
|
|
|
|
.end = end_, \
|
|
|
|
.flags = flags_, \
|
|
|
|
.pgoff = pgoff_, \
|
mm: avoid using vma_merge() for new VMAs
Abstract vma_merge_new_vma() to use vma_merge_struct and rename the
resultant function vma_merge_new_range() to be clear what the purpose of
this function is - a new VMA is desired in the specified range, and we
wish to see if it is possible to 'merge' surrounding VMAs into this range
rather than having to allocate a new VMA.
Note that this function uses vma_extend() exclusively, so adopts its
requirement that the iterator point at or before the gap. We add an
assert to this effect.
This is as opposed to vma_merge_existing_range(), which will be introduced
in a subsequent commit, and provide the same functionality for cases in
which we are modifying an existing VMA.
In mmap_region() and do_brk_flags() we open code scenarios where we prefer
to use vma_expand() rather than invoke a full vma_merge() operation.
Abstract this logic and eliminate all of the open-coding, and also use the
same logic for all cases where we add new VMAs to, rather than ultimately
use vma_merge(), rather use vma_expand().
Doing so removes duplication and simplifies VMA merging in all such cases,
laying the ground for us to eliminate the merging of new VMAs in
vma_merge() altogether.
Also add the ability for the vmg to track state, and able to report
errors, allowing for us to differentiate a failed merge from an inability
to allocate memory in callers.
This makes it far easier to understand what is happening in these cases
avoiding confusion, bugs and allowing for future optimisation.
Also introduce vma_iter_next_rewind() to allow for retrieval of the next,
and (optionally) the prev VMA, rewinding to the start of the previous gap.
Introduce are_anon_vmas_compatible() to abstract individual VMA anon_vma
comparison for the case of merging on both sides where the anon_vma of the
VMA being merged maybe compatible with prev and next, but prev and next's
anon_vma's may not be compatible with each other.
Finally also introduce can_vma_merge_left() / can_vma_merge_right() to
check adjacent VMA compatibility and that they are indeed adjacent.
Link: https://lkml.kernel.org/r/49d37c0769b6b9dc03b27fe4d059173832556392.1725040657.git.lorenzo.stoakes@oracle.com
Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Tested-by: Mark Brown <broonie@kernel.org>
Cc: Liam R. Howlett <Liam.Howlett@oracle.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Bert Karwatzki <spasswolf@web.de>
Cc: Jeff Xu <jeffxu@chromium.org>
Cc: Jiri Olsa <olsajiri@gmail.com>
Cc: Kees Cook <kees@kernel.org>
Cc: Lorenzo Stoakes <lstoakes@gmail.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: "Paul E. McKenney" <paulmck@kernel.org>
Cc: Paul Moore <paul@paul-moore.com>
Cc: Sidhartha Kumar <sidhartha.kumar@oracle.com>
Cc: Suren Baghdasaryan <surenb@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2024-08-30 11:10:18 -07:00
|
|
|
.state = VMA_MERGE_START, \
|
2024-10-17 07:31:45 -07:00
|
|
|
.merge_flags = VMG_FLAG_DEFAULT, \
|
2024-08-30 11:10:15 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
#define VMG_VMA_STATE(name, vmi_, prev_, vma_, start_, end_) \
|
|
|
|
struct vma_merge_struct name = { \
|
|
|
|
.mm = vma_->vm_mm, \
|
|
|
|
.vmi = vmi_, \
|
|
|
|
.prev = prev_, \
|
|
|
|
.next = NULL, \
|
|
|
|
.vma = vma_, \
|
|
|
|
.start = start_, \
|
|
|
|
.end = end_, \
|
|
|
|
.flags = vma_->vm_flags, \
|
|
|
|
.pgoff = vma_pgoff_offset(vma_, start_), \
|
|
|
|
.file = vma_->vm_file, \
|
|
|
|
.anon_vma = vma_->anon_vma, \
|
|
|
|
.policy = vma_policy(vma_), \
|
|
|
|
.uffd_ctx = vma_->vm_userfaultfd_ctx, \
|
|
|
|
.anon_name = anon_vma_name(vma_), \
|
mm: avoid using vma_merge() for new VMAs
Abstract vma_merge_new_vma() to use vma_merge_struct and rename the
resultant function vma_merge_new_range() to be clear what the purpose of
this function is - a new VMA is desired in the specified range, and we
wish to see if it is possible to 'merge' surrounding VMAs into this range
rather than having to allocate a new VMA.
Note that this function uses vma_extend() exclusively, so adopts its
requirement that the iterator point at or before the gap. We add an
assert to this effect.
This is as opposed to vma_merge_existing_range(), which will be introduced
in a subsequent commit, and provide the same functionality for cases in
which we are modifying an existing VMA.
In mmap_region() and do_brk_flags() we open code scenarios where we prefer
to use vma_expand() rather than invoke a full vma_merge() operation.
Abstract this logic and eliminate all of the open-coding, and also use the
same logic for all cases where we add new VMAs to, rather than ultimately
use vma_merge(), rather use vma_expand().
Doing so removes duplication and simplifies VMA merging in all such cases,
laying the ground for us to eliminate the merging of new VMAs in
vma_merge() altogether.
Also add the ability for the vmg to track state, and able to report
errors, allowing for us to differentiate a failed merge from an inability
to allocate memory in callers.
This makes it far easier to understand what is happening in these cases
avoiding confusion, bugs and allowing for future optimisation.
Also introduce vma_iter_next_rewind() to allow for retrieval of the next,
and (optionally) the prev VMA, rewinding to the start of the previous gap.
Introduce are_anon_vmas_compatible() to abstract individual VMA anon_vma
comparison for the case of merging on both sides where the anon_vma of the
VMA being merged maybe compatible with prev and next, but prev and next's
anon_vma's may not be compatible with each other.
Finally also introduce can_vma_merge_left() / can_vma_merge_right() to
check adjacent VMA compatibility and that they are indeed adjacent.
Link: https://lkml.kernel.org/r/49d37c0769b6b9dc03b27fe4d059173832556392.1725040657.git.lorenzo.stoakes@oracle.com
Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Tested-by: Mark Brown <broonie@kernel.org>
Cc: Liam R. Howlett <Liam.Howlett@oracle.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Bert Karwatzki <spasswolf@web.de>
Cc: Jeff Xu <jeffxu@chromium.org>
Cc: Jiri Olsa <olsajiri@gmail.com>
Cc: Kees Cook <kees@kernel.org>
Cc: Lorenzo Stoakes <lstoakes@gmail.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: "Paul E. McKenney" <paulmck@kernel.org>
Cc: Paul Moore <paul@paul-moore.com>
Cc: Sidhartha Kumar <sidhartha.kumar@oracle.com>
Cc: Suren Baghdasaryan <surenb@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2024-08-30 11:10:18 -07:00
|
|
|
.state = VMA_MERGE_START, \
|
2024-10-17 07:31:45 -07:00
|
|
|
.merge_flags = VMG_FLAG_DEFAULT, \
|
2024-08-30 11:10:15 -07:00
|
|
|
}
|
|
|
|
|
2024-07-29 04:50:38 -07:00
|
|
|
#ifdef CONFIG_DEBUG_VM_MAPLE_TREE
|
|
|
|
void validate_mm(struct mm_struct *mm);
|
|
|
|
#else
|
|
|
|
#define validate_mm(mm) do { } while (0)
|
|
|
|
#endif
|
|
|
|
|
|
|
|
/* Required for expand_downwards(). */
|
|
|
|
void anon_vma_interval_tree_pre_update_vma(struct vm_area_struct *vma);
|
|
|
|
|
|
|
|
/* Required for expand_downwards(). */
|
|
|
|
void anon_vma_interval_tree_post_update_vma(struct vm_area_struct *vma);
|
|
|
|
|
2024-08-30 11:10:17 -07:00
|
|
|
int vma_expand(struct vma_merge_struct *vmg);
|
2024-07-29 04:50:38 -07:00
|
|
|
int vma_shrink(struct vma_iterator *vmi, struct vm_area_struct *vma,
|
|
|
|
unsigned long start, unsigned long end, pgoff_t pgoff);
|
|
|
|
|
2024-08-29 21:00:55 -07:00
|
|
|
static inline int vma_iter_store_gfp(struct vma_iterator *vmi,
|
|
|
|
struct vm_area_struct *vma, gfp_t gfp)
|
|
|
|
|
|
|
|
{
|
|
|
|
if (vmi->mas.status != ma_start &&
|
|
|
|
((vmi->mas.index > vma->vm_start) || (vmi->mas.last < vma->vm_start)))
|
|
|
|
vma_iter_invalidate(vmi);
|
|
|
|
|
|
|
|
__mas_set_range(&vmi->mas, vma->vm_start, vma->vm_end - 1);
|
|
|
|
mas_store_gfp(&vmi->mas, vma, gfp);
|
|
|
|
if (unlikely(mas_is_err(&vmi->mas)))
|
|
|
|
return -ENOMEM;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2024-08-29 21:00:52 -07:00
|
|
|
#ifdef CONFIG_MMU
|
2024-08-29 21:00:49 -07:00
|
|
|
/*
|
|
|
|
* init_vma_munmap() - Initializer wrapper for vma_munmap_struct
|
|
|
|
* @vms: The vma munmap struct
|
|
|
|
* @vmi: The vma iterator
|
|
|
|
* @vma: The first vm_area_struct to munmap
|
|
|
|
* @start: The aligned start address to munmap
|
|
|
|
* @end: The aligned end address to munmap
|
|
|
|
* @uf: The userfaultfd list_head
|
|
|
|
* @unlock: Unlock after the operation. Only unlocked on success
|
|
|
|
*/
|
|
|
|
static inline void init_vma_munmap(struct vma_munmap_struct *vms,
|
|
|
|
struct vma_iterator *vmi, struct vm_area_struct *vma,
|
|
|
|
unsigned long start, unsigned long end, struct list_head *uf,
|
|
|
|
bool unlock)
|
|
|
|
{
|
|
|
|
vms->vmi = vmi;
|
|
|
|
vms->vma = vma;
|
2024-08-29 21:00:50 -07:00
|
|
|
if (vma) {
|
|
|
|
vms->start = start;
|
|
|
|
vms->end = end;
|
|
|
|
} else {
|
|
|
|
vms->start = vms->end = 0;
|
|
|
|
}
|
2024-08-29 21:00:49 -07:00
|
|
|
vms->unlock = unlock;
|
|
|
|
vms->uf = uf;
|
|
|
|
vms->vma_count = 0;
|
|
|
|
vms->nr_pages = vms->locked_vm = vms->nr_accounted = 0;
|
|
|
|
vms->exec_vm = vms->stack_vm = vms->data_vm = 0;
|
2024-08-29 21:00:52 -07:00
|
|
|
vms->unmap_start = FIRST_USER_ADDRESS;
|
|
|
|
vms->unmap_end = USER_PGTABLES_CEILING;
|
2024-08-29 21:00:54 -07:00
|
|
|
vms->clear_ptes = false;
|
2024-08-29 21:00:49 -07:00
|
|
|
}
|
2024-08-29 21:00:52 -07:00
|
|
|
#endif
|
2024-08-29 21:00:49 -07:00
|
|
|
|
|
|
|
int vms_gather_munmap_vmas(struct vma_munmap_struct *vms,
|
|
|
|
struct ma_state *mas_detach);
|
|
|
|
|
|
|
|
void vms_complete_munmap_vmas(struct vma_munmap_struct *vms,
|
|
|
|
struct ma_state *mas_detach);
|
|
|
|
|
2024-08-29 21:00:54 -07:00
|
|
|
void vms_clean_up_area(struct vma_munmap_struct *vms,
|
2024-08-29 21:00:58 -07:00
|
|
|
struct ma_state *mas_detach);
|
2024-08-29 21:00:54 -07:00
|
|
|
|
2024-08-29 21:00:51 -07:00
|
|
|
/*
|
2024-08-29 21:00:55 -07:00
|
|
|
* reattach_vmas() - Undo any munmap work and free resources
|
|
|
|
* @mas_detach: The maple state with the detached maple tree
|
2024-08-29 21:00:51 -07:00
|
|
|
*
|
|
|
|
* Reattach any detached vmas and free up the maple tree used to track the vmas.
|
|
|
|
*/
|
2024-08-29 21:00:55 -07:00
|
|
|
static inline void reattach_vmas(struct ma_state *mas_detach)
|
2024-08-29 21:00:51 -07:00
|
|
|
{
|
|
|
|
struct vm_area_struct *vma;
|
|
|
|
|
|
|
|
mas_set(mas_detach, 0);
|
2024-08-29 21:00:55 -07:00
|
|
|
mas_for_each(mas_detach, vma, ULONG_MAX)
|
2024-08-29 21:00:51 -07:00
|
|
|
vma_mark_detached(vma, false);
|
|
|
|
|
|
|
|
__mt_destroy(mas_detach->tree);
|
|
|
|
}
|
|
|
|
|
2024-08-29 21:00:55 -07:00
|
|
|
/*
|
|
|
|
* vms_abort_munmap_vmas() - Undo as much as possible from an aborted munmap()
|
|
|
|
* operation.
|
|
|
|
* @vms: The vma unmap structure
|
|
|
|
* @mas_detach: The maple state with the detached maple tree
|
|
|
|
*
|
|
|
|
* Reattach any detached vmas, free up the maple tree used to track the vmas.
|
|
|
|
* If that's not possible because the ptes are cleared (and vm_ops->closed() may
|
|
|
|
* have been called), then a NULL is written over the vmas and the vmas are
|
|
|
|
* removed (munmap() completed).
|
|
|
|
*/
|
|
|
|
static inline void vms_abort_munmap_vmas(struct vma_munmap_struct *vms,
|
|
|
|
struct ma_state *mas_detach)
|
|
|
|
{
|
|
|
|
struct ma_state *mas = &vms->vmi->mas;
|
|
|
|
if (!vms->nr_pages)
|
|
|
|
return;
|
|
|
|
|
|
|
|
if (vms->clear_ptes)
|
|
|
|
return reattach_vmas(mas_detach);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Aborting cannot just call the vm_ops open() because they are often
|
|
|
|
* not symmetrical and state data has been lost. Resort to the old
|
|
|
|
* failure method of leaving a gap where the MAP_FIXED mapping failed.
|
|
|
|
*/
|
|
|
|
mas_set_range(mas, vms->start, vms->end - 1);
|
2024-10-16 08:07:53 -07:00
|
|
|
mas_store_gfp(mas, NULL, GFP_KERNEL|__GFP_NOFAIL);
|
|
|
|
/* Clean up the insertion of the unfortunate gap */
|
|
|
|
vms_complete_munmap_vmas(vms, mas_detach);
|
2024-08-29 21:00:55 -07:00
|
|
|
}
|
|
|
|
|
2024-07-29 04:50:38 -07:00
|
|
|
int
|
|
|
|
do_vmi_align_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma,
|
|
|
|
struct mm_struct *mm, unsigned long start,
|
|
|
|
unsigned long end, struct list_head *uf, bool unlock);
|
|
|
|
|
|
|
|
int do_vmi_munmap(struct vma_iterator *vmi, struct mm_struct *mm,
|
|
|
|
unsigned long start, size_t len, struct list_head *uf,
|
|
|
|
bool unlock);
|
|
|
|
|
2024-10-29 11:11:45 -07:00
|
|
|
void remove_vma(struct vm_area_struct *vma, bool unreachable);
|
2024-07-29 04:50:38 -07:00
|
|
|
|
2024-08-29 21:00:53 -07:00
|
|
|
void unmap_region(struct ma_state *mas, struct vm_area_struct *vma,
|
|
|
|
struct vm_area_struct *prev, struct vm_area_struct *next);
|
2024-07-29 04:50:38 -07:00
|
|
|
|
|
|
|
/* We are about to modify the VMA's flags. */
|
2024-08-30 11:10:15 -07:00
|
|
|
struct vm_area_struct *vma_modify_flags(struct vma_iterator *vmi,
|
|
|
|
struct vm_area_struct *prev, struct vm_area_struct *vma,
|
|
|
|
unsigned long start, unsigned long end,
|
|
|
|
unsigned long new_flags);
|
2024-07-29 04:50:38 -07:00
|
|
|
|
|
|
|
/* We are about to modify the VMA's flags and/or anon_name. */
|
2024-08-30 11:10:15 -07:00
|
|
|
struct vm_area_struct
|
2024-07-29 04:50:38 -07:00
|
|
|
*vma_modify_flags_name(struct vma_iterator *vmi,
|
|
|
|
struct vm_area_struct *prev,
|
|
|
|
struct vm_area_struct *vma,
|
|
|
|
unsigned long start,
|
|
|
|
unsigned long end,
|
|
|
|
unsigned long new_flags,
|
2024-08-30 11:10:15 -07:00
|
|
|
struct anon_vma_name *new_name);
|
2024-07-29 04:50:38 -07:00
|
|
|
|
|
|
|
/* We are about to modify the VMA's memory policy. */
|
2024-08-30 11:10:15 -07:00
|
|
|
struct vm_area_struct
|
2024-07-29 04:50:38 -07:00
|
|
|
*vma_modify_policy(struct vma_iterator *vmi,
|
|
|
|
struct vm_area_struct *prev,
|
|
|
|
struct vm_area_struct *vma,
|
|
|
|
unsigned long start, unsigned long end,
|
2024-08-30 11:10:15 -07:00
|
|
|
struct mempolicy *new_pol);
|
2024-07-29 04:50:38 -07:00
|
|
|
|
|
|
|
/* We are about to modify the VMA's flags and/or uffd context. */
|
2024-08-30 11:10:15 -07:00
|
|
|
struct vm_area_struct
|
2024-07-29 04:50:38 -07:00
|
|
|
*vma_modify_flags_uffd(struct vma_iterator *vmi,
|
|
|
|
struct vm_area_struct *prev,
|
|
|
|
struct vm_area_struct *vma,
|
|
|
|
unsigned long start, unsigned long end,
|
|
|
|
unsigned long new_flags,
|
2024-08-30 11:10:15 -07:00
|
|
|
struct vm_userfaultfd_ctx new_ctx);
|
2024-07-29 04:50:38 -07:00
|
|
|
|
mm: avoid using vma_merge() for new VMAs
Abstract vma_merge_new_vma() to use vma_merge_struct and rename the
resultant function vma_merge_new_range() to be clear what the purpose of
this function is - a new VMA is desired in the specified range, and we
wish to see if it is possible to 'merge' surrounding VMAs into this range
rather than having to allocate a new VMA.
Note that this function uses vma_extend() exclusively, so adopts its
requirement that the iterator point at or before the gap. We add an
assert to this effect.
This is as opposed to vma_merge_existing_range(), which will be introduced
in a subsequent commit, and provide the same functionality for cases in
which we are modifying an existing VMA.
In mmap_region() and do_brk_flags() we open code scenarios where we prefer
to use vma_expand() rather than invoke a full vma_merge() operation.
Abstract this logic and eliminate all of the open-coding, and also use the
same logic for all cases where we add new VMAs to, rather than ultimately
use vma_merge(), rather use vma_expand().
Doing so removes duplication and simplifies VMA merging in all such cases,
laying the ground for us to eliminate the merging of new VMAs in
vma_merge() altogether.
Also add the ability for the vmg to track state, and able to report
errors, allowing for us to differentiate a failed merge from an inability
to allocate memory in callers.
This makes it far easier to understand what is happening in these cases
avoiding confusion, bugs and allowing for future optimisation.
Also introduce vma_iter_next_rewind() to allow for retrieval of the next,
and (optionally) the prev VMA, rewinding to the start of the previous gap.
Introduce are_anon_vmas_compatible() to abstract individual VMA anon_vma
comparison for the case of merging on both sides where the anon_vma of the
VMA being merged maybe compatible with prev and next, but prev and next's
anon_vma's may not be compatible with each other.
Finally also introduce can_vma_merge_left() / can_vma_merge_right() to
check adjacent VMA compatibility and that they are indeed adjacent.
Link: https://lkml.kernel.org/r/49d37c0769b6b9dc03b27fe4d059173832556392.1725040657.git.lorenzo.stoakes@oracle.com
Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Tested-by: Mark Brown <broonie@kernel.org>
Cc: Liam R. Howlett <Liam.Howlett@oracle.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Bert Karwatzki <spasswolf@web.de>
Cc: Jeff Xu <jeffxu@chromium.org>
Cc: Jiri Olsa <olsajiri@gmail.com>
Cc: Kees Cook <kees@kernel.org>
Cc: Lorenzo Stoakes <lstoakes@gmail.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: "Paul E. McKenney" <paulmck@kernel.org>
Cc: Paul Moore <paul@paul-moore.com>
Cc: Sidhartha Kumar <sidhartha.kumar@oracle.com>
Cc: Suren Baghdasaryan <surenb@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2024-08-30 11:10:18 -07:00
|
|
|
struct vm_area_struct *vma_merge_new_range(struct vma_merge_struct *vmg);
|
2024-07-29 04:50:38 -07:00
|
|
|
|
|
|
|
struct vm_area_struct *vma_merge_extend(struct vma_iterator *vmi,
|
|
|
|
struct vm_area_struct *vma,
|
|
|
|
unsigned long delta);
|
|
|
|
|
|
|
|
void unlink_file_vma_batch_init(struct unlink_vma_file_batch *vb);
|
|
|
|
|
|
|
|
void unlink_file_vma_batch_final(struct unlink_vma_file_batch *vb);
|
|
|
|
|
|
|
|
void unlink_file_vma_batch_add(struct unlink_vma_file_batch *vb,
|
|
|
|
struct vm_area_struct *vma);
|
|
|
|
|
|
|
|
void unlink_file_vma(struct vm_area_struct *vma);
|
|
|
|
|
|
|
|
void vma_link_file(struct vm_area_struct *vma);
|
|
|
|
|
|
|
|
int vma_link(struct mm_struct *mm, struct vm_area_struct *vma);
|
|
|
|
|
|
|
|
struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
|
|
|
|
unsigned long addr, unsigned long len, pgoff_t pgoff,
|
|
|
|
bool *need_rmap_locks);
|
|
|
|
|
|
|
|
struct anon_vma *find_mergeable_anon_vma(struct vm_area_struct *vma);
|
|
|
|
|
|
|
|
bool vma_needs_dirty_tracking(struct vm_area_struct *vma);
|
|
|
|
bool vma_wants_writenotify(struct vm_area_struct *vma, pgprot_t vm_page_prot);
|
|
|
|
|
|
|
|
int mm_take_all_locks(struct mm_struct *mm);
|
|
|
|
void mm_drop_all_locks(struct mm_struct *mm);
|
|
|
|
|
|
|
|
static inline bool vma_wants_manual_pte_write_upgrade(struct vm_area_struct *vma)
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* We want to check manually if we can change individual PTEs writable
|
|
|
|
* if we can't do that automatically for all PTEs in a mapping. For
|
|
|
|
* private mappings, that's always the case when we have write
|
|
|
|
* permissions as we properly have to handle COW.
|
|
|
|
*/
|
|
|
|
if (vma->vm_flags & VM_SHARED)
|
|
|
|
return vma_wants_writenotify(vma, vma->vm_page_prot);
|
|
|
|
return !!(vma->vm_flags & VM_WRITE);
|
|
|
|
}
|
|
|
|
|
|
|
|
#ifdef CONFIG_MMU
|
|
|
|
static inline pgprot_t vm_pgprot_modify(pgprot_t oldprot, unsigned long vm_flags)
|
|
|
|
{
|
|
|
|
return pgprot_modify(oldprot, vm_get_page_prot(vm_flags));
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
static inline struct vm_area_struct *vma_prev_limit(struct vma_iterator *vmi,
|
|
|
|
unsigned long min)
|
|
|
|
{
|
|
|
|
return mas_prev(&vmi->mas, min);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* These three helpers classifies VMAs for virtual memory accounting.
|
|
|
|
*/
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Executable code area - executable, not writable, not stack
|
|
|
|
*/
|
|
|
|
static inline bool is_exec_mapping(vm_flags_t flags)
|
|
|
|
{
|
|
|
|
return (flags & (VM_EXEC | VM_WRITE | VM_STACK)) == VM_EXEC;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Stack area (including shadow stacks)
|
|
|
|
*
|
|
|
|
* VM_GROWSUP / VM_GROWSDOWN VMAs are always private anonymous:
|
|
|
|
* do_mmap() forbids all other combinations.
|
|
|
|
*/
|
|
|
|
static inline bool is_stack_mapping(vm_flags_t flags)
|
|
|
|
{
|
|
|
|
return ((flags & VM_STACK) == VM_STACK) || (flags & VM_SHADOW_STACK);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Data area - private, writable, not stack
|
|
|
|
*/
|
|
|
|
static inline bool is_data_mapping(vm_flags_t flags)
|
|
|
|
{
|
|
|
|
return (flags & (VM_WRITE | VM_SHARED | VM_STACK)) == VM_WRITE;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
static inline void vma_iter_config(struct vma_iterator *vmi,
|
|
|
|
unsigned long index, unsigned long last)
|
|
|
|
{
|
|
|
|
__mas_set_range(&vmi->mas, index, last - 1);
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline void vma_iter_reset(struct vma_iterator *vmi)
|
|
|
|
{
|
|
|
|
mas_reset(&vmi->mas);
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline
|
|
|
|
struct vm_area_struct *vma_iter_prev_range_limit(struct vma_iterator *vmi, unsigned long min)
|
|
|
|
{
|
|
|
|
return mas_prev_range(&vmi->mas, min);
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline
|
|
|
|
struct vm_area_struct *vma_iter_next_range_limit(struct vma_iterator *vmi, unsigned long max)
|
|
|
|
{
|
|
|
|
return mas_next_range(&vmi->mas, max);
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline int vma_iter_area_lowest(struct vma_iterator *vmi, unsigned long min,
|
|
|
|
unsigned long max, unsigned long size)
|
|
|
|
{
|
|
|
|
return mas_empty_area(&vmi->mas, min, max - 1, size);
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline int vma_iter_area_highest(struct vma_iterator *vmi, unsigned long min,
|
|
|
|
unsigned long max, unsigned long size)
|
|
|
|
{
|
|
|
|
return mas_empty_area_rev(&vmi->mas, min, max - 1, size);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* VMA Iterator functions shared between nommu and mmap
|
|
|
|
*/
|
|
|
|
static inline int vma_iter_prealloc(struct vma_iterator *vmi,
|
|
|
|
struct vm_area_struct *vma)
|
|
|
|
{
|
|
|
|
return mas_preallocate(&vmi->mas, vma, GFP_KERNEL);
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline void vma_iter_clear(struct vma_iterator *vmi)
|
|
|
|
{
|
|
|
|
mas_store_prealloc(&vmi->mas, NULL);
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline struct vm_area_struct *vma_iter_load(struct vma_iterator *vmi)
|
|
|
|
{
|
|
|
|
return mas_walk(&vmi->mas);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Store a VMA with preallocated memory */
|
|
|
|
static inline void vma_iter_store(struct vma_iterator *vmi,
|
|
|
|
struct vm_area_struct *vma)
|
|
|
|
{
|
|
|
|
|
|
|
|
#if defined(CONFIG_DEBUG_VM_MAPLE_TREE)
|
|
|
|
if (MAS_WARN_ON(&vmi->mas, vmi->mas.status != ma_start &&
|
|
|
|
vmi->mas.index > vma->vm_start)) {
|
|
|
|
pr_warn("%lx > %lx\n store vma %lx-%lx\n into slot %lx-%lx\n",
|
|
|
|
vmi->mas.index, vma->vm_start, vma->vm_start,
|
|
|
|
vma->vm_end, vmi->mas.index, vmi->mas.last);
|
|
|
|
}
|
|
|
|
if (MAS_WARN_ON(&vmi->mas, vmi->mas.status != ma_start &&
|
|
|
|
vmi->mas.last < vma->vm_start)) {
|
|
|
|
pr_warn("%lx < %lx\nstore vma %lx-%lx\ninto slot %lx-%lx\n",
|
|
|
|
vmi->mas.last, vma->vm_start, vma->vm_start, vma->vm_end,
|
|
|
|
vmi->mas.index, vmi->mas.last);
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
if (vmi->mas.status != ma_start &&
|
|
|
|
((vmi->mas.index > vma->vm_start) || (vmi->mas.last < vma->vm_start)))
|
|
|
|
vma_iter_invalidate(vmi);
|
|
|
|
|
|
|
|
__mas_set_range(&vmi->mas, vma->vm_start, vma->vm_end - 1);
|
|
|
|
mas_store_prealloc(&vmi->mas, vma);
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline unsigned long vma_iter_addr(struct vma_iterator *vmi)
|
|
|
|
{
|
|
|
|
return vmi->mas.index;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline unsigned long vma_iter_end(struct vma_iterator *vmi)
|
|
|
|
{
|
|
|
|
return vmi->mas.last + 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline int vma_iter_bulk_alloc(struct vma_iterator *vmi,
|
|
|
|
unsigned long count)
|
|
|
|
{
|
|
|
|
return mas_expected_entries(&vmi->mas, count);
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline
|
|
|
|
struct vm_area_struct *vma_iter_prev_range(struct vma_iterator *vmi)
|
|
|
|
{
|
|
|
|
return mas_prev_range(&vmi->mas, 0);
|
|
|
|
}
|
|
|
|
|
mm: avoid using vma_merge() for new VMAs
Abstract vma_merge_new_vma() to use vma_merge_struct and rename the
resultant function vma_merge_new_range() to be clear what the purpose of
this function is - a new VMA is desired in the specified range, and we
wish to see if it is possible to 'merge' surrounding VMAs into this range
rather than having to allocate a new VMA.
Note that this function uses vma_extend() exclusively, so adopts its
requirement that the iterator point at or before the gap. We add an
assert to this effect.
This is as opposed to vma_merge_existing_range(), which will be introduced
in a subsequent commit, and provide the same functionality for cases in
which we are modifying an existing VMA.
In mmap_region() and do_brk_flags() we open code scenarios where we prefer
to use vma_expand() rather than invoke a full vma_merge() operation.
Abstract this logic and eliminate all of the open-coding, and also use the
same logic for all cases where we add new VMAs to, rather than ultimately
use vma_merge(), rather use vma_expand().
Doing so removes duplication and simplifies VMA merging in all such cases,
laying the ground for us to eliminate the merging of new VMAs in
vma_merge() altogether.
Also add the ability for the vmg to track state, and able to report
errors, allowing for us to differentiate a failed merge from an inability
to allocate memory in callers.
This makes it far easier to understand what is happening in these cases
avoiding confusion, bugs and allowing for future optimisation.
Also introduce vma_iter_next_rewind() to allow for retrieval of the next,
and (optionally) the prev VMA, rewinding to the start of the previous gap.
Introduce are_anon_vmas_compatible() to abstract individual VMA anon_vma
comparison for the case of merging on both sides where the anon_vma of the
VMA being merged maybe compatible with prev and next, but prev and next's
anon_vma's may not be compatible with each other.
Finally also introduce can_vma_merge_left() / can_vma_merge_right() to
check adjacent VMA compatibility and that they are indeed adjacent.
Link: https://lkml.kernel.org/r/49d37c0769b6b9dc03b27fe4d059173832556392.1725040657.git.lorenzo.stoakes@oracle.com
Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Tested-by: Mark Brown <broonie@kernel.org>
Cc: Liam R. Howlett <Liam.Howlett@oracle.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Bert Karwatzki <spasswolf@web.de>
Cc: Jeff Xu <jeffxu@chromium.org>
Cc: Jiri Olsa <olsajiri@gmail.com>
Cc: Kees Cook <kees@kernel.org>
Cc: Lorenzo Stoakes <lstoakes@gmail.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: "Paul E. McKenney" <paulmck@kernel.org>
Cc: Paul Moore <paul@paul-moore.com>
Cc: Sidhartha Kumar <sidhartha.kumar@oracle.com>
Cc: Suren Baghdasaryan <surenb@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2024-08-30 11:10:18 -07:00
|
|
|
/*
|
|
|
|
* Retrieve the next VMA and rewind the iterator to end of the previous VMA, or
|
|
|
|
* if no previous VMA, to index 0.
|
|
|
|
*/
|
|
|
|
static inline
|
|
|
|
struct vm_area_struct *vma_iter_next_rewind(struct vma_iterator *vmi,
|
|
|
|
struct vm_area_struct **pprev)
|
|
|
|
{
|
|
|
|
struct vm_area_struct *next = vma_next(vmi);
|
|
|
|
struct vm_area_struct *prev = vma_prev(vmi);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Consider the case where no previous VMA exists. We advance to the
|
|
|
|
* next VMA, skipping any gap, then rewind to the start of the range.
|
|
|
|
*
|
|
|
|
* If we were to unconditionally advance to the next range we'd wind up
|
|
|
|
* at the next VMA again, so we check to ensure there is a previous VMA
|
|
|
|
* to skip over.
|
|
|
|
*/
|
|
|
|
if (prev)
|
|
|
|
vma_iter_next_range(vmi);
|
|
|
|
|
|
|
|
if (pprev)
|
|
|
|
*pprev = prev;
|
|
|
|
|
|
|
|
return next;
|
|
|
|
}
|
|
|
|
|
2024-08-16 17:18:28 -07:00
|
|
|
#ifdef CONFIG_64BIT
|
|
|
|
|
|
|
|
static inline bool vma_is_sealed(struct vm_area_struct *vma)
|
|
|
|
{
|
|
|
|
return (vma->vm_flags & VM_SEALED);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* check if a vma is sealed for modification.
|
|
|
|
* return true, if modification is allowed.
|
|
|
|
*/
|
|
|
|
static inline bool can_modify_vma(struct vm_area_struct *vma)
|
|
|
|
{
|
|
|
|
if (unlikely(vma_is_sealed(vma)))
|
|
|
|
return false;
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2024-08-16 17:18:32 -07:00
|
|
|
bool can_modify_vma_madv(struct vm_area_struct *vma, int behavior);
|
|
|
|
|
2024-08-16 17:18:28 -07:00
|
|
|
#else
|
|
|
|
|
|
|
|
static inline bool can_modify_vma(struct vm_area_struct *vma)
|
|
|
|
{
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2024-08-16 17:18:32 -07:00
|
|
|
static inline bool can_modify_vma_madv(struct vm_area_struct *vma, int behavior)
|
|
|
|
{
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2024-08-16 17:18:28 -07:00
|
|
|
#endif
|
|
|
|
|
2024-07-29 04:50:38 -07:00
|
|
|
#endif /* __MM_VMA_H */
|