2019-06-04 01:11:33 -07:00
|
|
|
// SPDX-License-Identifier: GPL-2.0-only
|
2013-01-18 02:42:20 -07:00
|
|
|
/* Page Fault Handling for ARC (TLB Miss / ProtV)
|
|
|
|
*
|
|
|
|
* Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include <linux/signal.h>
|
|
|
|
#include <linux/interrupt.h>
|
2017-02-08 10:51:30 -07:00
|
|
|
#include <linux/sched/signal.h>
|
2013-01-18 02:42:20 -07:00
|
|
|
#include <linux/errno.h>
|
|
|
|
#include <linux/ptrace.h>
|
|
|
|
#include <linux/uaccess.h>
|
|
|
|
#include <linux/kdebug.h>
|
2014-10-02 00:00:42 -07:00
|
|
|
#include <linux/perf_event.h>
|
2018-08-17 15:44:47 -07:00
|
|
|
#include <linux/mm_types.h>
|
2023-08-12 18:23:59 -07:00
|
|
|
#include <asm/entry.h>
|
2013-05-14 00:58:17 -07:00
|
|
|
#include <asm/mmu.h>
|
2013-01-18 02:42:20 -07:00
|
|
|
|
2015-03-05 04:36:31 -07:00
|
|
|
/*
|
|
|
|
* kernel virtual address is required to implement vmalloc/pkmap/fixmap
|
|
|
|
* Refer to asm/processor.h for System Memory Map
|
|
|
|
*
|
|
|
|
* It simply copies the PMD entry (pointer to 2nd level page table or hugepage)
|
|
|
|
* from swapper pgdir to task pgdir. The 2nd level table/page is thus shared
|
|
|
|
*/
|
|
|
|
noinline static int handle_kernel_vaddr_fault(unsigned long address)
|
2013-01-18 02:42:20 -07:00
|
|
|
{
|
|
|
|
/*
|
|
|
|
* Synchronize this task's top level page-table
|
|
|
|
* with the 'reference' page table.
|
|
|
|
*/
|
|
|
|
pgd_t *pgd, *pgd_k;
|
2019-11-30 18:51:06 -07:00
|
|
|
p4d_t *p4d, *p4d_k;
|
2013-01-18 02:42:20 -07:00
|
|
|
pud_t *pud, *pud_k;
|
|
|
|
pmd_t *pmd, *pmd_k;
|
|
|
|
|
2020-01-13 10:16:06 -07:00
|
|
|
pgd = pgd_offset(current->active_mm, address);
|
2013-01-18 02:42:20 -07:00
|
|
|
pgd_k = pgd_offset_k(address);
|
|
|
|
|
ARC: mm: vmalloc sync from kernel to user table to update PMD ...
... not PGD
vmalloc() sets up the kernel page table (starting from @swapper_pg_dir).
But when vmalloc area is accessed in context of a user task, say opening
terminal in n_tty_open(), the user page tables need to be synced from
kernel page tables so that TLB entry is created in "user context".
The old code was doing this incorrectly, as it was updating the user pgd
entry (first level itself) to point to kernel pud table (2nd level),
effectively yanking away the entire user space translation with kernel one.
The correct way to do this is to ONLY update a user space pgd/pud/pmd entry
if it is not popluated already. This ensures that only the missing leaf
pmd entry gets updated to point to relevant kernel pte table.
From code change pov, we are chaging the pattern:
p4d = p4d_offset(pgd, address);
p4d_k = p4d_offset(pgd_k, address);
if (!p4d_present(*p4d_k))
goto bad_area;
set_p4d(p4d, *p4d_k);
with
p4d = p4d_offset(pgd, address);
p4d_k = p4d_offset(pgd_k, address);
if (p4d_none(*p4d_k))
goto bad_area;
if (!p4d_present(*p4d))
set_p4d(p4d, *p4d_k);
Signed-off-by: Vineet Gupta <vgupta@kernel.org>
2020-10-13 19:44:07 -07:00
|
|
|
if (pgd_none (*pgd_k))
|
2013-01-18 02:42:20 -07:00
|
|
|
goto bad_area;
|
ARC: mm: vmalloc sync from kernel to user table to update PMD ...
... not PGD
vmalloc() sets up the kernel page table (starting from @swapper_pg_dir).
But when vmalloc area is accessed in context of a user task, say opening
terminal in n_tty_open(), the user page tables need to be synced from
kernel page tables so that TLB entry is created in "user context".
The old code was doing this incorrectly, as it was updating the user pgd
entry (first level itself) to point to kernel pud table (2nd level),
effectively yanking away the entire user space translation with kernel one.
The correct way to do this is to ONLY update a user space pgd/pud/pmd entry
if it is not popluated already. This ensures that only the missing leaf
pmd entry gets updated to point to relevant kernel pte table.
From code change pov, we are chaging the pattern:
p4d = p4d_offset(pgd, address);
p4d_k = p4d_offset(pgd_k, address);
if (!p4d_present(*p4d_k))
goto bad_area;
set_p4d(p4d, *p4d_k);
with
p4d = p4d_offset(pgd, address);
p4d_k = p4d_offset(pgd_k, address);
if (p4d_none(*p4d_k))
goto bad_area;
if (!p4d_present(*p4d))
set_p4d(p4d, *p4d_k);
Signed-off-by: Vineet Gupta <vgupta@kernel.org>
2020-10-13 19:44:07 -07:00
|
|
|
if (!pgd_present(*pgd))
|
|
|
|
set_pgd(pgd, *pgd_k);
|
2020-09-30 18:58:50 -07:00
|
|
|
|
2019-11-30 18:51:06 -07:00
|
|
|
p4d = p4d_offset(pgd, address);
|
|
|
|
p4d_k = p4d_offset(pgd_k, address);
|
ARC: mm: vmalloc sync from kernel to user table to update PMD ...
... not PGD
vmalloc() sets up the kernel page table (starting from @swapper_pg_dir).
But when vmalloc area is accessed in context of a user task, say opening
terminal in n_tty_open(), the user page tables need to be synced from
kernel page tables so that TLB entry is created in "user context".
The old code was doing this incorrectly, as it was updating the user pgd
entry (first level itself) to point to kernel pud table (2nd level),
effectively yanking away the entire user space translation with kernel one.
The correct way to do this is to ONLY update a user space pgd/pud/pmd entry
if it is not popluated already. This ensures that only the missing leaf
pmd entry gets updated to point to relevant kernel pte table.
From code change pov, we are chaging the pattern:
p4d = p4d_offset(pgd, address);
p4d_k = p4d_offset(pgd_k, address);
if (!p4d_present(*p4d_k))
goto bad_area;
set_p4d(p4d, *p4d_k);
with
p4d = p4d_offset(pgd, address);
p4d_k = p4d_offset(pgd_k, address);
if (p4d_none(*p4d_k))
goto bad_area;
if (!p4d_present(*p4d))
set_p4d(p4d, *p4d_k);
Signed-off-by: Vineet Gupta <vgupta@kernel.org>
2020-10-13 19:44:07 -07:00
|
|
|
if (p4d_none(*p4d_k))
|
2019-11-30 18:51:06 -07:00
|
|
|
goto bad_area;
|
ARC: mm: vmalloc sync from kernel to user table to update PMD ...
... not PGD
vmalloc() sets up the kernel page table (starting from @swapper_pg_dir).
But when vmalloc area is accessed in context of a user task, say opening
terminal in n_tty_open(), the user page tables need to be synced from
kernel page tables so that TLB entry is created in "user context".
The old code was doing this incorrectly, as it was updating the user pgd
entry (first level itself) to point to kernel pud table (2nd level),
effectively yanking away the entire user space translation with kernel one.
The correct way to do this is to ONLY update a user space pgd/pud/pmd entry
if it is not popluated already. This ensures that only the missing leaf
pmd entry gets updated to point to relevant kernel pte table.
From code change pov, we are chaging the pattern:
p4d = p4d_offset(pgd, address);
p4d_k = p4d_offset(pgd_k, address);
if (!p4d_present(*p4d_k))
goto bad_area;
set_p4d(p4d, *p4d_k);
with
p4d = p4d_offset(pgd, address);
p4d_k = p4d_offset(pgd_k, address);
if (p4d_none(*p4d_k))
goto bad_area;
if (!p4d_present(*p4d))
set_p4d(p4d, *p4d_k);
Signed-off-by: Vineet Gupta <vgupta@kernel.org>
2020-10-13 19:44:07 -07:00
|
|
|
if (!p4d_present(*p4d))
|
|
|
|
set_p4d(p4d, *p4d_k);
|
2020-10-01 15:46:42 -07:00
|
|
|
|
2019-11-30 18:51:06 -07:00
|
|
|
pud = pud_offset(p4d, address);
|
|
|
|
pud_k = pud_offset(p4d_k, address);
|
ARC: mm: vmalloc sync from kernel to user table to update PMD ...
... not PGD
vmalloc() sets up the kernel page table (starting from @swapper_pg_dir).
But when vmalloc area is accessed in context of a user task, say opening
terminal in n_tty_open(), the user page tables need to be synced from
kernel page tables so that TLB entry is created in "user context".
The old code was doing this incorrectly, as it was updating the user pgd
entry (first level itself) to point to kernel pud table (2nd level),
effectively yanking away the entire user space translation with kernel one.
The correct way to do this is to ONLY update a user space pgd/pud/pmd entry
if it is not popluated already. This ensures that only the missing leaf
pmd entry gets updated to point to relevant kernel pte table.
From code change pov, we are chaging the pattern:
p4d = p4d_offset(pgd, address);
p4d_k = p4d_offset(pgd_k, address);
if (!p4d_present(*p4d_k))
goto bad_area;
set_p4d(p4d, *p4d_k);
with
p4d = p4d_offset(pgd, address);
p4d_k = p4d_offset(pgd_k, address);
if (p4d_none(*p4d_k))
goto bad_area;
if (!p4d_present(*p4d))
set_p4d(p4d, *p4d_k);
Signed-off-by: Vineet Gupta <vgupta@kernel.org>
2020-10-13 19:44:07 -07:00
|
|
|
if (pud_none(*pud_k))
|
2013-01-18 02:42:20 -07:00
|
|
|
goto bad_area;
|
ARC: mm: vmalloc sync from kernel to user table to update PMD ...
... not PGD
vmalloc() sets up the kernel page table (starting from @swapper_pg_dir).
But when vmalloc area is accessed in context of a user task, say opening
terminal in n_tty_open(), the user page tables need to be synced from
kernel page tables so that TLB entry is created in "user context".
The old code was doing this incorrectly, as it was updating the user pgd
entry (first level itself) to point to kernel pud table (2nd level),
effectively yanking away the entire user space translation with kernel one.
The correct way to do this is to ONLY update a user space pgd/pud/pmd entry
if it is not popluated already. This ensures that only the missing leaf
pmd entry gets updated to point to relevant kernel pte table.
From code change pov, we are chaging the pattern:
p4d = p4d_offset(pgd, address);
p4d_k = p4d_offset(pgd_k, address);
if (!p4d_present(*p4d_k))
goto bad_area;
set_p4d(p4d, *p4d_k);
with
p4d = p4d_offset(pgd, address);
p4d_k = p4d_offset(pgd_k, address);
if (p4d_none(*p4d_k))
goto bad_area;
if (!p4d_present(*p4d))
set_p4d(p4d, *p4d_k);
Signed-off-by: Vineet Gupta <vgupta@kernel.org>
2020-10-13 19:44:07 -07:00
|
|
|
if (!pud_present(*pud))
|
|
|
|
set_pud(pud, *pud_k);
|
2020-09-30 18:58:50 -07:00
|
|
|
|
2013-01-18 02:42:20 -07:00
|
|
|
pmd = pmd_offset(pud, address);
|
|
|
|
pmd_k = pmd_offset(pud_k, address);
|
ARC: mm: vmalloc sync from kernel to user table to update PMD ...
... not PGD
vmalloc() sets up the kernel page table (starting from @swapper_pg_dir).
But when vmalloc area is accessed in context of a user task, say opening
terminal in n_tty_open(), the user page tables need to be synced from
kernel page tables so that TLB entry is created in "user context".
The old code was doing this incorrectly, as it was updating the user pgd
entry (first level itself) to point to kernel pud table (2nd level),
effectively yanking away the entire user space translation with kernel one.
The correct way to do this is to ONLY update a user space pgd/pud/pmd entry
if it is not popluated already. This ensures that only the missing leaf
pmd entry gets updated to point to relevant kernel pte table.
From code change pov, we are chaging the pattern:
p4d = p4d_offset(pgd, address);
p4d_k = p4d_offset(pgd_k, address);
if (!p4d_present(*p4d_k))
goto bad_area;
set_p4d(p4d, *p4d_k);
with
p4d = p4d_offset(pgd, address);
p4d_k = p4d_offset(pgd_k, address);
if (p4d_none(*p4d_k))
goto bad_area;
if (!p4d_present(*p4d))
set_p4d(p4d, *p4d_k);
Signed-off-by: Vineet Gupta <vgupta@kernel.org>
2020-10-13 19:44:07 -07:00
|
|
|
if (pmd_none(*pmd_k))
|
2013-01-18 02:42:20 -07:00
|
|
|
goto bad_area;
|
ARC: mm: vmalloc sync from kernel to user table to update PMD ...
... not PGD
vmalloc() sets up the kernel page table (starting from @swapper_pg_dir).
But when vmalloc area is accessed in context of a user task, say opening
terminal in n_tty_open(), the user page tables need to be synced from
kernel page tables so that TLB entry is created in "user context".
The old code was doing this incorrectly, as it was updating the user pgd
entry (first level itself) to point to kernel pud table (2nd level),
effectively yanking away the entire user space translation with kernel one.
The correct way to do this is to ONLY update a user space pgd/pud/pmd entry
if it is not popluated already. This ensures that only the missing leaf
pmd entry gets updated to point to relevant kernel pte table.
From code change pov, we are chaging the pattern:
p4d = p4d_offset(pgd, address);
p4d_k = p4d_offset(pgd_k, address);
if (!p4d_present(*p4d_k))
goto bad_area;
set_p4d(p4d, *p4d_k);
with
p4d = p4d_offset(pgd, address);
p4d_k = p4d_offset(pgd_k, address);
if (p4d_none(*p4d_k))
goto bad_area;
if (!p4d_present(*p4d))
set_p4d(p4d, *p4d_k);
Signed-off-by: Vineet Gupta <vgupta@kernel.org>
2020-10-13 19:44:07 -07:00
|
|
|
if (!pmd_present(*pmd))
|
|
|
|
set_pmd(pmd, *pmd_k);
|
2013-01-18 02:42:20 -07:00
|
|
|
|
|
|
|
/* XXX: create the TLB entry here */
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
bad_area:
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
2013-09-18 03:55:40 -07:00
|
|
|
void do_page_fault(unsigned long address, struct pt_regs *regs)
|
2013-01-18 02:42:20 -07:00
|
|
|
{
|
|
|
|
struct vm_area_struct *vma = NULL;
|
|
|
|
struct task_struct *tsk = current;
|
|
|
|
struct mm_struct *mm = tsk->mm;
|
2019-05-14 15:55:31 -07:00
|
|
|
int sig, si_code = SEGV_MAPERR;
|
2019-05-14 14:25:54 -07:00
|
|
|
unsigned int write = 0, exec = 0, mask;
|
2019-05-14 15:55:31 -07:00
|
|
|
vm_fault_t fault = VM_FAULT_SIGSEGV; /* handle_mm_fault() output */
|
2019-05-14 14:35:45 -07:00
|
|
|
unsigned int flags; /* handle_mm_fault() input */
|
2013-01-18 02:42:20 -07:00
|
|
|
|
|
|
|
/*
|
|
|
|
* NOTE! We MUST NOT take any locks for this case. We may
|
|
|
|
* be in an interrupt or a critical region, and should
|
|
|
|
* only copy the information from the master page table,
|
|
|
|
* nothing more.
|
|
|
|
*/
|
2019-05-13 10:28:00 -07:00
|
|
|
if (address >= VMALLOC_START && !user_mode(regs)) {
|
2019-05-14 16:07:24 -07:00
|
|
|
if (unlikely(handle_kernel_vaddr_fault(address)))
|
2019-05-13 10:28:00 -07:00
|
|
|
goto no_context;
|
2013-01-18 02:42:20 -07:00
|
|
|
else
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If we're in an interrupt or have no user
|
|
|
|
* context, we must not take the fault..
|
|
|
|
*/
|
2015-05-11 08:52:11 -07:00
|
|
|
if (faulthandler_disabled() || !mm)
|
2013-01-18 02:42:20 -07:00
|
|
|
goto no_context;
|
|
|
|
|
2020-05-21 17:44:32 -07:00
|
|
|
if (regs->ecr.cause & ECR_C_PROTV_STORE) /* ST/EX */
|
2019-05-14 14:25:54 -07:00
|
|
|
write = 1;
|
2020-05-21 17:44:32 -07:00
|
|
|
else if ((regs->ecr.vec == ECR_V_PROTV) &&
|
|
|
|
(regs->ecr.cause == ECR_C_PROTV_INST_FETCH))
|
2019-05-14 14:25:54 -07:00
|
|
|
exec = 1;
|
|
|
|
|
2020-04-01 21:08:37 -07:00
|
|
|
flags = FAULT_FLAG_DEFAULT;
|
2013-09-12 15:13:39 -07:00
|
|
|
if (user_mode(regs))
|
|
|
|
flags |= FAULT_FLAG_USER;
|
2019-05-14 14:25:54 -07:00
|
|
|
if (write)
|
|
|
|
flags |= FAULT_FLAG_WRITE;
|
|
|
|
|
2020-08-11 18:37:52 -07:00
|
|
|
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
|
2013-01-18 02:42:20 -07:00
|
|
|
retry:
|
mm/fault: convert remaining simple cases to lock_mm_and_find_vma()
This does the simple pattern conversion of alpha, arc, csky, hexagon,
loongarch, nios2, sh, sparc32, and xtensa to the lock_mm_and_find_vma()
helper. They all have the regular fault handling pattern without odd
special cases.
The remaining architectures all have something that keeps us from a
straightforward conversion: ia64 and parisc have stacks that can grow
both up as well as down (and ia64 has special address region checks).
And m68k, microblaze, openrisc, sparc64, and um end up having extra
rules about only expanding the stack down a limited amount below the
user space stack pointer. That is something that x86 used to do too
(long long ago), and it probably could just be skipped, but it still
makes the conversion less than trivial.
Note that this conversion was done manually and with the exception of
alpha without any build testing, because I have a fairly limited cross-
building environment. The cases are all simple, and I went through the
changes several times, but...
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2023-06-24 10:55:38 -07:00
|
|
|
vma = lock_mm_and_find_vma(mm, address, regs);
|
2013-01-18 02:42:20 -07:00
|
|
|
if (!vma)
|
mm/fault: convert remaining simple cases to lock_mm_and_find_vma()
This does the simple pattern conversion of alpha, arc, csky, hexagon,
loongarch, nios2, sh, sparc32, and xtensa to the lock_mm_and_find_vma()
helper. They all have the regular fault handling pattern without odd
special cases.
The remaining architectures all have something that keeps us from a
straightforward conversion: ia64 and parisc have stacks that can grow
both up as well as down (and ia64 has special address region checks).
And m68k, microblaze, openrisc, sparc64, and um end up having extra
rules about only expanding the stack down a limited amount below the
user space stack pointer. That is something that x86 used to do too
(long long ago), and it probably could just be skipped, but it still
makes the conversion less than trivial.
Note that this conversion was done manually and with the exception of
alpha without any build testing, because I have a fairly limited cross-
building environment. The cases are all simple, and I went through the
changes several times, but...
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2023-06-24 10:55:38 -07:00
|
|
|
goto bad_area_nosemaphore;
|
2013-01-18 02:42:20 -07:00
|
|
|
|
|
|
|
/*
|
2019-05-14 14:25:54 -07:00
|
|
|
* vm_area is good, now check permissions for this memory access
|
2013-01-18 02:42:20 -07:00
|
|
|
*/
|
2019-05-14 14:25:54 -07:00
|
|
|
mask = VM_READ;
|
|
|
|
if (write)
|
|
|
|
mask = VM_WRITE;
|
|
|
|
if (exec)
|
|
|
|
mask = VM_EXEC;
|
|
|
|
|
|
|
|
if (!(vma->vm_flags & mask)) {
|
|
|
|
si_code = SEGV_ACCERR;
|
2013-01-18 02:42:20 -07:00
|
|
|
goto bad_area;
|
|
|
|
}
|
|
|
|
|
2020-08-11 18:37:52 -07:00
|
|
|
fault = handle_mm_fault(vma, address, flags, regs);
|
2013-01-18 02:42:20 -07:00
|
|
|
|
2020-04-01 21:08:14 -07:00
|
|
|
/* Quick path to respond to signals */
|
|
|
|
if (fault_signal_pending(fault, regs)) {
|
|
|
|
if (!user_mode(regs))
|
|
|
|
goto no_context;
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
mm: avoid unnecessary page fault retires on shared memory types
I observed that for each of the shared file-backed page faults, we're very
likely to retry one more time for the 1st write fault upon no page. It's
because we'll need to release the mmap lock for dirty rate limit purpose
with balance_dirty_pages_ratelimited() (in fault_dirty_shared_page()).
Then after that throttling we return VM_FAULT_RETRY.
We did that probably because VM_FAULT_RETRY is the only way we can return
to the fault handler at that time telling it we've released the mmap lock.
However that's not ideal because it's very likely the fault does not need
to be retried at all since the pgtable was well installed before the
throttling, so the next continuous fault (including taking mmap read lock,
walk the pgtable, etc.) could be in most cases unnecessary.
It's not only slowing down page faults for shared file-backed, but also add
more mmap lock contention which is in most cases not needed at all.
To observe this, one could try to write to some shmem page and look at
"pgfault" value in /proc/vmstat, then we should expect 2 counts for each
shmem write simply because we retried, and vm event "pgfault" will capture
that.
To make it more efficient, add a new VM_FAULT_COMPLETED return code just to
show that we've completed the whole fault and released the lock. It's also
a hint that we should very possibly not need another fault immediately on
this page because we've just completed it.
This patch provides a ~12% perf boost on my aarch64 test VM with a simple
program sequentially dirtying 400MB shmem file being mmap()ed and these are
the time it needs:
Before: 650.980 ms (+-1.94%)
After: 569.396 ms (+-1.38%)
I believe it could help more than that.
We need some special care on GUP and the s390 pgfault handler (for gmap
code before returning from pgfault), the rest changes in the page fault
handlers should be relatively straightforward.
Another thing to mention is that mm_account_fault() does take this new
fault as a generic fault to be accounted, unlike VM_FAULT_RETRY.
I explicitly didn't touch hmm_vma_fault() and break_ksm() because they do
not handle VM_FAULT_RETRY even with existing code, so I'm literally keeping
them as-is.
Link: https://lkml.kernel.org/r/20220530183450.42886-1-peterx@redhat.com
Signed-off-by: Peter Xu <peterx@redhat.com>
Acked-by: Geert Uytterhoeven <geert@linux-m68k.org>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Vineet Gupta <vgupta@kernel.org>
Acked-by: Guo Ren <guoren@kernel.org>
Acked-by: Max Filippov <jcmvbkbc@gmail.com>
Acked-by: Christian Borntraeger <borntraeger@linux.ibm.com>
Acked-by: Michael Ellerman <mpe@ellerman.id.au> (powerpc)
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Reviewed-by: Alistair Popple <apopple@nvidia.com>
Reviewed-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk> [arm part]
Acked-by: Heiko Carstens <hca@linux.ibm.com>
Cc: Vasily Gorbik <gor@linux.ibm.com>
Cc: Stafford Horne <shorne@gmail.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: Johannes Berg <johannes@sipsolutions.net>
Cc: Brian Cain <bcain@quicinc.com>
Cc: Richard Henderson <rth@twiddle.net>
Cc: Richard Weinberger <richard@nod.at>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Janosch Frank <frankja@linux.ibm.com>
Cc: Albert Ou <aou@eecs.berkeley.edu>
Cc: Anton Ivanov <anton.ivanov@cambridgegreys.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Sven Schnelle <svens@linux.ibm.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: James Bottomley <James.Bottomley@HansenPartnership.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Alexander Gordeev <agordeev@linux.ibm.com>
Cc: Jonas Bonn <jonas@southpole.se>
Cc: Will Deacon <will@kernel.org>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Michal Simek <monstr@monstr.eu>
Cc: Matt Turner <mattst88@gmail.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: David Hildenbrand <david@redhat.com>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Palmer Dabbelt <palmer@dabbelt.com>
Cc: Stefan Kristiansson <stefan.kristiansson@saunalahti.fi>
Cc: Paul Walmsley <paul.walmsley@sifive.com>
Cc: Ivan Kokshaysky <ink@jurassic.park.msu.ru>
Cc: Chris Zankel <chris@zankel.net>
Cc: Hugh Dickins <hughd@google.com>
Cc: Dinh Nguyen <dinguyen@kernel.org>
Cc: Rich Felker <dalias@libc.org>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
Cc: Helge Deller <deller@gmx.de>
Cc: Yoshinori Sato <ysato@users.osdn.me>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2022-05-30 11:34:50 -07:00
|
|
|
/* The fault is fully completed (including releasing mmap lock) */
|
|
|
|
if (fault & VM_FAULT_COMPLETED)
|
|
|
|
return;
|
|
|
|
|
2019-05-14 14:35:45 -07:00
|
|
|
/*
|
2020-06-08 21:33:54 -07:00
|
|
|
* Fault retry nuances, mmap_lock already relinquished by core mm
|
2019-05-14 14:35:45 -07:00
|
|
|
*/
|
2022-01-14 15:05:51 -07:00
|
|
|
if (unlikely(fault & VM_FAULT_RETRY)) {
|
2020-04-01 21:08:14 -07:00
|
|
|
flags |= FAULT_FLAG_TRIED;
|
|
|
|
goto retry;
|
2013-01-18 02:42:20 -07:00
|
|
|
}
|
|
|
|
|
2019-05-14 16:28:30 -07:00
|
|
|
bad_area:
|
2020-06-08 21:33:25 -07:00
|
|
|
mmap_read_unlock(mm);
|
2013-01-18 02:42:20 -07:00
|
|
|
|
mm/fault: convert remaining simple cases to lock_mm_and_find_vma()
This does the simple pattern conversion of alpha, arc, csky, hexagon,
loongarch, nios2, sh, sparc32, and xtensa to the lock_mm_and_find_vma()
helper. They all have the regular fault handling pattern without odd
special cases.
The remaining architectures all have something that keeps us from a
straightforward conversion: ia64 and parisc have stacks that can grow
both up as well as down (and ia64 has special address region checks).
And m68k, microblaze, openrisc, sparc64, and um end up having extra
rules about only expanding the stack down a limited amount below the
user space stack pointer. That is something that x86 used to do too
(long long ago), and it probably could just be skipped, but it still
makes the conversion less than trivial.
Note that this conversion was done manually and with the exception of
alpha without any build testing, because I have a fairly limited cross-
building environment. The cases are all simple, and I went through the
changes several times, but...
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2023-06-24 10:55:38 -07:00
|
|
|
bad_area_nosemaphore:
|
2013-01-18 02:42:20 -07:00
|
|
|
/*
|
2019-05-14 14:35:45 -07:00
|
|
|
* Major/minor page fault accounting
|
|
|
|
* (in case of retry we only land here once)
|
2013-01-18 02:42:20 -07:00
|
|
|
*/
|
2020-08-11 18:37:52 -07:00
|
|
|
if (likely(!(fault & VM_FAULT_ERROR)))
|
2019-05-14 14:35:45 -07:00
|
|
|
/* Normal return path: fault Handled Gracefully */
|
2013-01-18 02:42:20 -07:00
|
|
|
return;
|
|
|
|
|
2019-05-14 15:10:45 -07:00
|
|
|
if (!user_mode(regs))
|
|
|
|
goto no_context;
|
2013-01-18 02:42:20 -07:00
|
|
|
|
2019-05-14 15:55:31 -07:00
|
|
|
if (fault & VM_FAULT_OOM) {
|
2013-07-08 15:59:50 -07:00
|
|
|
pagefault_out_of_memory();
|
|
|
|
return;
|
|
|
|
}
|
2013-01-18 02:42:20 -07:00
|
|
|
|
2019-05-14 15:55:31 -07:00
|
|
|
if (fault & VM_FAULT_SIGBUS) {
|
|
|
|
sig = SIGBUS;
|
|
|
|
si_code = BUS_ADRERR;
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
sig = SIGSEGV;
|
|
|
|
}
|
2013-01-18 02:42:20 -07:00
|
|
|
|
|
|
|
tsk->thread.fault_address = address;
|
2019-07-16 15:07:51 -07:00
|
|
|
force_sig_fault(sig, si_code, (void __user *)address);
|
2019-05-14 14:45:44 -07:00
|
|
|
return;
|
2013-01-18 02:42:20 -07:00
|
|
|
|
2019-05-14 14:45:44 -07:00
|
|
|
no_context:
|
|
|
|
if (fixup_exception(regs))
|
|
|
|
return;
|
2013-01-18 02:42:20 -07:00
|
|
|
|
2019-05-14 14:45:44 -07:00
|
|
|
die("Oops", regs, address);
|
2013-01-18 02:42:20 -07:00
|
|
|
}
|