1

x86/entry: Avoid redundant CR3 write on paranoid returns

The CR3 restore happens in:

  1. #NMI return.
  2. paranoid_exit() (i.e. #MCE, #VC, #DB and #DF return)

Contrary to the implication in commit 21e9445911 ("x86/mm: Optimize
RESTORE_CR3"), the kernel never modifies CR3 in any of these exceptions,
except for switching from user to kernel pagetables under PTI. That
means that most of the time when returning from an exception that
interrupted the kernel no CR3 restore is necessary. Writing CR3 is
expensive on some machines.

Most of the time because the interrupt might have come during kernel entry
before the user to kernel CR3 switch or the during exit after the kernel to
user switch. In the former case skipping the restore would be correct, but
definitely not for the latter.

So check the saved CR3 value and restore it only, if it is a user CR3.

Give the macro a new name to clarify its usage, and remove a comment that
was describing the original behaviour along with the not longer needed jump
label.

Signed-off-by: Lai Jiangshan <laijs@linux.alibaba.com>
Signed-off-by: Brendan Jackman <jackmanb@google.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/r/20240108113950.360438-1-jackmanb@google.com

[Rewrote commit message; responded to review comments]
Change-Id: I6e56978c4753fb943a7897ff101f519514fa0827
This commit is contained in:
Lai Jiangshan 2024-01-08 11:39:50 +00:00 committed by Thomas Gleixner
parent 6613476e22
commit bb99836199
2 changed files with 13 additions and 20 deletions

View File

@ -239,17 +239,19 @@ For 32-bit we have the following conventions - kernel is built with
.Ldone_\@:
.endm
.macro RESTORE_CR3 scratch_reg:req save_reg:req
/* Restore CR3 from a kernel context. May restore a user CR3 value. */
.macro PARANOID_RESTORE_CR3 scratch_reg:req save_reg:req
ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI
ALTERNATIVE "jmp .Lwrcr3_\@", "", X86_FEATURE_PCID
/*
* KERNEL pages can always resume with NOFLUSH as we do
* explicit flushes.
* If CR3 contained the kernel page tables at the paranoid exception
* entry, then there is nothing to restore as CR3 is not modified while
* handling the exception.
*/
bt $PTI_USER_PGTABLE_BIT, \save_reg
jnc .Lnoflush_\@
jnc .Lend_\@
ALTERNATIVE "jmp .Lwrcr3_\@", "", X86_FEATURE_PCID
/*
* Check if there's a pending flush for the user ASID we're
@ -257,20 +259,12 @@ For 32-bit we have the following conventions - kernel is built with
*/
movq \save_reg, \scratch_reg
andq $(0x7FF), \scratch_reg
bt \scratch_reg, THIS_CPU_user_pcid_flush_mask
jnc .Lnoflush_\@
btr \scratch_reg, THIS_CPU_user_pcid_flush_mask
jmp .Lwrcr3_\@
jc .Lwrcr3_\@
.Lnoflush_\@:
SET_NOFLUSH_BIT \save_reg
.Lwrcr3_\@:
/*
* The CR3 write could be avoided when not changing its value,
* but would require a CR3 read *and* a scratch register.
*/
movq \save_reg, %cr3
.Lend_\@:
.endm
@ -285,7 +279,7 @@ For 32-bit we have the following conventions - kernel is built with
.endm
.macro SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg:req save_reg:req
.endm
.macro RESTORE_CR3 scratch_reg:req save_reg:req
.macro PARANOID_RESTORE_CR3 scratch_reg:req save_reg:req
.endm
#endif

View File

@ -968,14 +968,14 @@ SYM_CODE_START_LOCAL(paranoid_exit)
IBRS_EXIT save_reg=%r15
/*
* The order of operations is important. RESTORE_CR3 requires
* The order of operations is important. PARANOID_RESTORE_CR3 requires
* kernel GSBASE.
*
* NB to anyone to try to optimize this code: this code does
* not execute at all for exceptions from user mode. Those
* exceptions go through error_return instead.
*/
RESTORE_CR3 scratch_reg=%rax save_reg=%r14
PARANOID_RESTORE_CR3 scratch_reg=%rax save_reg=%r14
/* Handle the three GSBASE cases */
ALTERNATIVE "jmp .Lparanoid_exit_checkgs", "", X86_FEATURE_FSGSBASE
@ -1404,8 +1404,7 @@ end_repeat_nmi:
/* Always restore stashed SPEC_CTRL value (see paranoid_entry) */
IBRS_EXIT save_reg=%r15
/* Always restore stashed CR3 value (see paranoid_entry) */
RESTORE_CR3 scratch_reg=%r15 save_reg=%r14
PARANOID_RESTORE_CR3 scratch_reg=%r15 save_reg=%r14
/*
* The above invocation of paranoid_entry stored the GSBASE