From 51005a59bcbe1add8802105437b3707ea257f2ea Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Wed, 15 May 2024 13:08:28 +0100
Subject: [PATCH 1/7] lkdtm/bugs: add test for hung smp_call_function_single()

The CONFIG_CSD_LOCK_WAIT_DEBUG option enables debugging of hung
smp_call_function*() calls (e.g. when the target CPU gets stuck within
the callback function). Testing this option requires triggering such
hangs.

This patch adds an lkdtm test with a hung smp_call_function_single()
callback, which can be used to test CONFIG_CSD_LOCK_WAIT_DEBUG and NMI
backtraces (as CONFIG_CSD_LOCK_WAIT_DEBUG will attempt an NMI backtrace
of the hung target CPU).

On arm64 using pseudo-NMI, this looks like:

| # mount -t debugfs none /sys/kernel/debug/
| # echo SMP_CALL_LOCKUP > /sys/kernel/debug/provoke-crash/DIRECT
| lkdtm: Performing direct entry SMP_CALL_LOCKUP
| smp: csd: Detected non-responsive CSD lock (#1) on CPU#1, waiting 5000000176 ns for CPU#00 __lkdtm_SMP_CALL_LOCKUP+0x0/0x8(0x0).
| smp:     csd: CSD lock (#1) handling this request.
| Sending NMI from CPU 1 to CPUs 0:
| NMI backtrace for cpu 0
| CPU: 0 PID: 0 Comm: swapper/0 Not tainted 6.9.0-rc4-00001-gfdfd281212ec #1
| Hardware name: linux,dummy-virt (DT)
| pstate: 60401005 (nZCv daif +PAN -UAO -TCO -DIT +SSBS BTYPE=--)
| pc : __lkdtm_SMP_CALL_LOCKUP+0x0/0x8
| lr : __flush_smp_call_function_queue+0x1b0/0x290
| sp : ffff800080003f30
| pmr_save: 00000060
| x29: ffff800080003f30 x28: ffffa4ce961a4900 x27: 0000000000000000
| x26: fff000003fcfa0c0 x25: ffffa4ce961a4900 x24: ffffa4ce959aa140
| x23: ffffa4ce959aa140 x22: 0000000000000000 x21: ffff800080523c40
| x20: 0000000000000000 x19: 0000000000000000 x18: fff05b31aa323000
| x17: fff05b31aa323000 x16: ffff800080000000 x15: 0000330fc3fe6b2c
| x14: 0000000000000000 x13: 0000000000000000 x12: 0000000000000279
| x11: 0000000000000040 x10: fff000000302d0a8 x9 : fff000000302d0a0
| x8 : fff0000003400270 x7 : 0000000000000000 x6 : ffffa4ce9451b810
| x5 : 0000000000000000 x4 : fff05b31aa323000 x3 : ffff800080003f30
| x2 : fff05b31aa323000 x1 : ffffa4ce959aa140 x0 : 0000000000000000
| Call trace:
|  __lkdtm_SMP_CALL_LOCKUP+0x0/0x8
|  generic_smp_call_function_single_interrupt+0x14/0x20
|  ipi_handler+0xb8/0x178
|  handle_percpu_devid_irq+0x84/0x130
|  generic_handle_domain_irq+0x2c/0x44
|  gic_handle_irq+0x118/0x240
|  call_on_irq_stack+0x24/0x4c
|  do_interrupt_handler+0x80/0x84
|  el1_interrupt+0x44/0xc0
|  el1h_64_irq_handler+0x18/0x24
|  el1h_64_irq+0x78/0x7c
|  default_idle_call+0x40/0x60
|  do_idle+0x23c/0x2d0
|  cpu_startup_entry+0x38/0x3c
|  kernel_init+0x0/0x1d8
|  start_kernel+0x51c/0x608
|  __primary_switched+0x80/0x88
| CPU: 1 PID: 128 Comm: sh Not tainted 6.9.0-rc4-00001-gfdfd281212ec #1
| Hardware name: linux,dummy-virt (DT)
| Call trace:
|  dump_backtrace+0x90/0xe8
|  show_stack+0x18/0x24
|  dump_stack_lvl+0xac/0xe8
|  dump_stack+0x18/0x24
|  csd_lock_wait_toolong+0x268/0x338
|  smp_call_function_single+0x1dc/0x2f0
|  lkdtm_SMP_CALL_LOCKUP+0xcc/0xfc
|  lkdtm_do_action+0x1c/0x38
|  direct_entry+0xbc/0x14c
|  full_proxy_write+0x60/0xb4
|  vfs_write+0xd0/0x35c
|  ksys_write+0x70/0x104
|  __arm64_sys_write+0x1c/0x28
|  invoke_syscall+0x48/0x114
|  el0_svc_common.constprop.0+0x40/0xe0
|  do_el0_svc+0x1c/0x28
|  el0_svc+0x38/0x108
|  el0t_64_sync_handler+0x120/0x12c
|  el0t_64_sync+0x1a4/0x1a8
| smp: csd: Continued non-responsive CSD lock (#1) on CPU#1, waiting 10000064272 ns for CPU#00 __lkdtm_SMP_CALL_LOCKUP+0x0/0x8(0x0).
| smp:     csd: CSD lock (#1) handling this request.
| smp: csd: Continued non-responsive CSD lock (#1) on CPU#1, waiting 15000064384 ns for CPU#00 __lkdtm_SMP_CALL_LOCKUP+0x0/0x8(0x0).
| smp:     csd: CSD lock (#1) handling this request.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Acked-by: Paul E. McKenney <paulmck@kernel.org>
Cc: Kees Cook <keescook@chromium.org>
Link: https://lore.kernel.org/r/20240515120828.375585-1-mark.rutland@arm.com
Signed-off-by: Kees Cook <kees@kernel.org>
---
 drivers/misc/lkdtm/bugs.c               | 30 +++++++++++++++++++++++++
 tools/testing/selftests/lkdtm/tests.txt |  1 +
 2 files changed, 31 insertions(+)

diff --git a/drivers/misc/lkdtm/bugs.c b/drivers/misc/lkdtm/bugs.c
index 5178c02b21eb..62ba01525479 100644
--- a/drivers/misc/lkdtm/bugs.c
+++ b/drivers/misc/lkdtm/bugs.c
@@ -286,6 +286,35 @@ static void lkdtm_HARDLOCKUP(void)
 		cpu_relax();
 }
 
+static void __lkdtm_SMP_CALL_LOCKUP(void *unused)
+{
+	for (;;)
+		cpu_relax();
+}
+
+static void lkdtm_SMP_CALL_LOCKUP(void)
+{
+	unsigned int cpu, target;
+
+	cpus_read_lock();
+
+	cpu = get_cpu();
+	target = cpumask_any_but(cpu_online_mask, cpu);
+
+	if (target >= nr_cpu_ids) {
+		pr_err("FAIL: no other online CPUs\n");
+		goto out_put_cpus;
+	}
+
+	smp_call_function_single(target, __lkdtm_SMP_CALL_LOCKUP, NULL, 1);
+
+	pr_err("FAIL: did not hang\n");
+
+out_put_cpus:
+	put_cpu();
+	cpus_read_unlock();
+}
+
 static void lkdtm_SPINLOCKUP(void)
 {
 	/* Must be called twice to trigger. */
@@ -680,6 +709,7 @@ static struct crashtype crashtypes[] = {
 	CRASHTYPE(UNALIGNED_LOAD_STORE_WRITE),
 	CRASHTYPE(SOFTLOCKUP),
 	CRASHTYPE(HARDLOCKUP),
+	CRASHTYPE(SMP_CALL_LOCKUP),
 	CRASHTYPE(SPINLOCKUP),
 	CRASHTYPE(HUNG_TASK),
 	CRASHTYPE(OVERFLOW_SIGNED),
diff --git a/tools/testing/selftests/lkdtm/tests.txt b/tools/testing/selftests/lkdtm/tests.txt
index 368973f05250..cff124c1eddd 100644
--- a/tools/testing/selftests/lkdtm/tests.txt
+++ b/tools/testing/selftests/lkdtm/tests.txt
@@ -31,6 +31,7 @@ SLAB_FREE_CROSS
 SLAB_FREE_PAGE
 #SOFTLOCKUP Hangs the system
 #HARDLOCKUP Hangs the system
+#SMP_CALL_LOCKUP Hangs the system
 #SPINLOCKUP Hangs the system
 #HUNG_TASK Hangs the system
 EXEC_DATA

From d6f635bcaca8d38dfa47ee20658705f9eff156b5 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Tue, 30 Apr 2024 17:02:22 -0700
Subject: [PATCH 2/7] x86/alternatives: Make FineIBT mode Kconfig selectable

Since FineIBT performs checking at the destination, it is weaker against
attacks that can construct arbitrary executable memory contents. As such,
some system builders want to run with FineIBT disabled by default. Allow
the "cfi=kcfi" boot param mode to be selectable through Kconfig via the
newly introduced CONFIG_CFI_AUTO_DEFAULT.

Reviewed-by: Sami Tolvanen <samitolvanen@google.com>
Reviewed-by: Nathan Chancellor <nathan@kernel.org>
Tested-by: Nathan Chancellor <nathan@kernel.org>
Link: https://lore.kernel.org/r/20240501000218.work.998-kees@kernel.org
Signed-off-by: Kees Cook <kees@kernel.org>
---
 arch/x86/Kconfig              | 9 +++++++++
 arch/x86/include/asm/cfi.h    | 2 +-
 arch/x86/kernel/alternative.c | 8 ++++----
 3 files changed, 14 insertions(+), 5 deletions(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 1d7122a1883e..56e301921d2a 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -2427,6 +2427,15 @@ config STRICT_SIGALTSTACK_SIZE
 
 	  Say 'N' unless you want to really enforce this check.
 
+config CFI_AUTO_DEFAULT
+	bool "Attempt to use FineIBT by default at boot time"
+	depends on FINEIBT
+	default y
+	help
+	  Attempt to use FineIBT by default at boot time. If enabled,
+	  this is the same as booting with "cfi=auto". If disabled,
+	  this is the same as booting with "cfi=kcfi".
+
 source "kernel/livepatch/Kconfig"
 
 endmenu
diff --git a/arch/x86/include/asm/cfi.h b/arch/x86/include/asm/cfi.h
index 7cd752557905..31d19c815f99 100644
--- a/arch/x86/include/asm/cfi.h
+++ b/arch/x86/include/asm/cfi.h
@@ -93,7 +93,7 @@
  *
  */
 enum cfi_mode {
-	CFI_DEFAULT,	/* FineIBT if hardware has IBT, otherwise kCFI */
+	CFI_AUTO,	/* FineIBT if hardware has IBT, otherwise kCFI */
 	CFI_OFF,	/* Taditional / IBT depending on .config */
 	CFI_KCFI,	/* Optionally CALL_PADDING, IBT, RETPOLINE */
 	CFI_FINEIBT,	/* see arch/x86/kernel/alternative.c */
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 89de61243272..7fcba437abae 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -885,8 +885,8 @@ void __init_or_module apply_seal_endbr(s32 *start, s32 *end) { }
 
 #endif /* CONFIG_X86_KERNEL_IBT */
 
-#ifdef CONFIG_FINEIBT
-#define __CFI_DEFAULT	CFI_DEFAULT
+#ifdef CONFIG_CFI_AUTO_DEFAULT
+#define __CFI_DEFAULT	CFI_AUTO
 #elif defined(CONFIG_CFI_CLANG)
 #define __CFI_DEFAULT	CFI_KCFI
 #else
@@ -994,7 +994,7 @@ static __init int cfi_parse_cmdline(char *str)
 		}
 
 		if (!strcmp(str, "auto")) {
-			cfi_mode = CFI_DEFAULT;
+			cfi_mode = CFI_AUTO;
 		} else if (!strcmp(str, "off")) {
 			cfi_mode = CFI_OFF;
 			cfi_rand = false;
@@ -1254,7 +1254,7 @@ static void __apply_fineibt(s32 *start_retpoline, s32 *end_retpoline,
 		      "FineIBT preamble wrong size: %ld", fineibt_preamble_size))
 		return;
 
-	if (cfi_mode == CFI_DEFAULT) {
+	if (cfi_mode == CFI_AUTO) {
 		cfi_mode = CFI_KCFI;
 		if (HAS_KERNEL_IBT && cpu_feature_enabled(X86_FEATURE_IBT))
 			cfi_mode = CFI_FINEIBT;

From 2003e483a81cc235e29f77da3f6b256cb4b348e7 Mon Sep 17 00:00:00 2001
From: Kees Cook <kees@kernel.org>
Date: Wed, 19 Jun 2024 13:31:05 -0700
Subject: [PATCH 3/7] fortify: Do not special-case 0-sized destinations

All fake flexible arrays should have been removed now, so remove the
special casing that was avoiding checking them. If a destination claims
to be 0 sized, believe it. This is especially important for cases where
__counted_by is in use and may have a 0 element count.

Link: https://lore.kernel.org/r/20240619203105.work.747-kees@kernel.org
Signed-off-by: Kees Cook <kees@kernel.org>
---
 include/linux/fortify-string.h | 8 ++------
 lib/fortify_kunit.c            | 3 +--
 2 files changed, 3 insertions(+), 8 deletions(-)

diff --git a/include/linux/fortify-string.h b/include/linux/fortify-string.h
index 7e0f340bf363..0d99bf11d260 100644
--- a/include/linux/fortify-string.h
+++ b/include/linux/fortify-string.h
@@ -601,11 +601,7 @@ __FORTIFY_INLINE bool fortify_memcpy_chk(__kernel_size_t size,
 	/*
 	 * Warn when writing beyond destination field size.
 	 *
-	 * We must ignore p_size_field == 0 for existing 0-element
-	 * fake flexible arrays, until they are all converted to
-	 * proper flexible arrays.
-	 *
-	 * The implementation of __builtin_*object_size() behaves
+	 * Note the implementation of __builtin_*object_size() behaves
 	 * like sizeof() when not directly referencing a flexible
 	 * array member, which means there will be many bounds checks
 	 * that will appear at run-time, without a way for them to be
@@ -613,7 +609,7 @@ __FORTIFY_INLINE bool fortify_memcpy_chk(__kernel_size_t size,
 	 * is specifically the flexible array member).
 	 * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=101832
 	 */
-	if (p_size_field != 0 && p_size_field != SIZE_MAX &&
+	if (p_size_field != SIZE_MAX &&
 	    p_size != p_size_field && p_size_field < size)
 		return true;
 
diff --git a/lib/fortify_kunit.c b/lib/fortify_kunit.c
index f9cc467334ce..f0c64b9e9b46 100644
--- a/lib/fortify_kunit.c
+++ b/lib/fortify_kunit.c
@@ -910,10 +910,9 @@ static void fortify_test_##memfunc(struct kunit *test)		\
 	memfunc(zero.buf, srcB, 0 + unconst);			\
 	KUNIT_EXPECT_EQ(test, fortify_read_overflows, 0);	\
 	KUNIT_EXPECT_EQ(test, fortify_write_overflows, 0);	\
-	/* We currently explicitly ignore zero-sized dests. */	\
 	memfunc(zero.buf, srcB, 1 + unconst);			\
 	KUNIT_EXPECT_EQ(test, fortify_read_overflows, 0);	\
-	KUNIT_EXPECT_EQ(test, fortify_write_overflows, 0);	\
+	KUNIT_EXPECT_EQ(test, fortify_write_overflows, 1);	\
 }
 __fortify_test(memcpy)
 __fortify_test(memmove)

From ef40d28f17bd384d7e0b630c7d83f108a526351b Mon Sep 17 00:00:00 2001
From: Kees Cook <kees@kernel.org>
Date: Wed, 19 Jun 2024 14:47:15 -0700
Subject: [PATCH 4/7] randomize_kstack: Remove non-functional per-arch entropy
 filtering

An unintended consequence of commit 9c573cd31343 ("randomize_kstack:
Improve entropy diffusion") was that the per-architecture entropy size
filtering reduced how many bits were being added to the mix, rather than
how many bits were being used during the offsetting. All architectures
fell back to the existing default of 0x3FF (10 bits), which will consume
at most 1KiB of stack space. It seems that this is working just fine,
so let's avoid the confusion and update everything to use the default.

The prior intent of the per-architecture limits were:

  arm64: capped at 0x1FF (9 bits), 5 bits effective
  powerpc: uncapped (10 bits), 6 or 7 bits effective
  riscv: uncapped (10 bits), 6 bits effective
  x86: capped at 0xFF (8 bits), 5 (x86_64) or 6 (ia32) bits effective
  s390: capped at 0xFF (8 bits), undocumented effective entropy

Current discussion has led to just dropping the original per-architecture
filters. The additional entropy appears to be safe for arm64, x86,
and s390. Quoting Arnd, "There is no point pretending that 15.75KB is
somehow safe to use while 15.00KB is not."

Co-developed-by: Yuntao Liu <liuyuntao12@huawei.com>
Signed-off-by: Yuntao Liu <liuyuntao12@huawei.com>
Fixes: 9c573cd31343 ("randomize_kstack: Improve entropy diffusion")
Link: https://lore.kernel.org/r/20240617133721.377540-1-liuyuntao12@huawei.com
Reviewed-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Mark Rutland <mark.rutland@arm.com>
Acked-by: Heiko Carstens <hca@linux.ibm.com> # s390
Link: https://lore.kernel.org/r/20240619214711.work.953-kees@kernel.org
Signed-off-by: Kees Cook <kees@kernel.org>
---
 arch/arm64/kernel/syscall.c          | 16 +++++++---------
 arch/s390/include/asm/entry-common.h |  2 +-
 arch/x86/include/asm/entry-common.h  | 15 ++++++---------
 3 files changed, 14 insertions(+), 19 deletions(-)

diff --git a/arch/arm64/kernel/syscall.c b/arch/arm64/kernel/syscall.c
index ad198262b981..7230f6e20ab8 100644
--- a/arch/arm64/kernel/syscall.c
+++ b/arch/arm64/kernel/syscall.c
@@ -53,17 +53,15 @@ static void invoke_syscall(struct pt_regs *regs, unsigned int scno,
 	syscall_set_return_value(current, regs, 0, ret);
 
 	/*
-	 * Ultimately, this value will get limited by KSTACK_OFFSET_MAX(),
-	 * but not enough for arm64 stack utilization comfort. To keep
-	 * reasonable stack head room, reduce the maximum offset to 9 bits.
+	 * This value will get limited by KSTACK_OFFSET_MAX(), which is 10
+	 * bits. The actual entropy will be further reduced by the compiler
+	 * when applying stack alignment constraints: the AAPCS mandates a
+	 * 16-byte aligned SP at function boundaries, which will remove the
+	 * 4 low bits from any entropy chosen here.
 	 *
-	 * The actual entropy will be further reduced by the compiler when
-	 * applying stack alignment constraints: the AAPCS mandates a
-	 * 16-byte (i.e. 4-bit) aligned SP at function boundaries.
-	 *
-	 * The resulting 5 bits of entropy is seen in SP[8:4].
+	 * The resulting 6 bits of entropy is seen in SP[9:4].
 	 */
-	choose_random_kstack_offset(get_random_u16() & 0x1FF);
+	choose_random_kstack_offset(get_random_u16());
 }
 
 static inline bool has_syscall_work(unsigned long flags)
diff --git a/arch/s390/include/asm/entry-common.h b/arch/s390/include/asm/entry-common.h
index 7f5004065e8a..35555c944630 100644
--- a/arch/s390/include/asm/entry-common.h
+++ b/arch/s390/include/asm/entry-common.h
@@ -54,7 +54,7 @@ static __always_inline void arch_exit_to_user_mode(void)
 static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs,
 						  unsigned long ti_work)
 {
-	choose_random_kstack_offset(get_tod_clock_fast() & 0xff);
+	choose_random_kstack_offset(get_tod_clock_fast());
 }
 
 #define arch_exit_to_user_mode_prepare arch_exit_to_user_mode_prepare
diff --git a/arch/x86/include/asm/entry-common.h b/arch/x86/include/asm/entry-common.h
index 7e523bb3d2d3..fb2809b20b0a 100644
--- a/arch/x86/include/asm/entry-common.h
+++ b/arch/x86/include/asm/entry-common.h
@@ -73,19 +73,16 @@ static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs,
 #endif
 
 	/*
-	 * Ultimately, this value will get limited by KSTACK_OFFSET_MAX(),
-	 * but not enough for x86 stack utilization comfort. To keep
-	 * reasonable stack head room, reduce the maximum offset to 8 bits.
-	 *
-	 * The actual entropy will be further reduced by the compiler when
-	 * applying stack alignment constraints (see cc_stack_align4/8 in
+	 * This value will get limited by KSTACK_OFFSET_MAX(), which is 10
+	 * bits. The actual entropy will be further reduced by the compiler
+	 * when applying stack alignment constraints (see cc_stack_align4/8 in
 	 * arch/x86/Makefile), which will remove the 3 (x86_64) or 2 (ia32)
 	 * low bits from any entropy chosen here.
 	 *
-	 * Therefore, final stack offset entropy will be 5 (x86_64) or
-	 * 6 (ia32) bits.
+	 * Therefore, final stack offset entropy will be 7 (x86_64) or
+	 * 8 (ia32) bits.
 	 */
-	choose_random_kstack_offset(rdtsc() & 0xFF);
+	choose_random_kstack_offset(rdtsc());
 }
 #define arch_exit_to_user_mode_prepare arch_exit_to_user_mode_prepare
 

From 18c18b1ff6c648ea62571554dfd698110757f894 Mon Sep 17 00:00:00 2001
From: Thorsten Blum <thorsten.blum@toblux.com>
Date: Sun, 30 Jun 2024 01:36:09 +0200
Subject: [PATCH 5/7] gcc-plugins: Remove duplicate included header file
 stringpool.h

The header file stringpool.h is included for GCC version >= 8 and then
again for all versions.

Since the header file stringpool.h was added in GCC 4.9 and the kernel
currently requires GCC 5.1 as a minimum, remove the conditional include.

Including the header file only once removes the following warning
reported by make includecheck:

  stringpool.h is included more than once

However, it's important to include stringpool.h before attribs.h
because attribs.h uses some of its functions.

Compile-tested with GCC 14.

Signed-off-by: Thorsten Blum <thorsten.blum@toblux.com>
Link: https://lore.kernel.org/r/20240629233608.278028-2-thorsten.blum@toblux.com
Signed-off-by: Kees Cook <kees@kernel.org>
---
 scripts/gcc-plugins/gcc-common.h | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/scripts/gcc-plugins/gcc-common.h b/scripts/gcc-plugins/gcc-common.h
index 1ae39b9f4a95..3222c1070444 100644
--- a/scripts/gcc-plugins/gcc-common.h
+++ b/scripts/gcc-plugins/gcc-common.h
@@ -62,11 +62,7 @@
 #include "pass_manager.h"
 #include "predict.h"
 #include "ipa-utils.h"
-
-#if BUILDING_GCC_VERSION >= 8000
 #include "stringpool.h"
-#endif
-
 #include "attribs.h"
 #include "varasm.h"
 #include "stor-layout.h"
@@ -78,7 +74,6 @@
 #include "context.h"
 #include "tree-ssa-alias.h"
 #include "tree-ssa.h"
-#include "stringpool.h"
 #if BUILDING_GCC_VERSION >= 7000
 #include "tree-vrp.h"
 #endif

From 3ccea4784fddd96fbd6c4497eb28b45dab638c2a Mon Sep 17 00:00:00 2001
From: Yanjun Yang <yangyj.ee@gmail.com>
Date: Tue, 11 Jun 2024 18:09:47 +0800
Subject: [PATCH 6/7] ARM: Remove address checking for MMUless devices

Commit 169f9102f9198b ("ARM: 9350/1: fault: Implement
copy_from_kernel_nofault_allowed()") added the function to check address
before use. However, for devices without MMU, addr > TASK_SIZE will
always fail.  This patch move this function after the #ifdef CONFIG_MMU
statement.

Signed-off-by: Yanjun Yang <yangyj.ee@gmail.com>
Acked-by: Ard Biesheuvel <ardb@kernel.org>
Closes: https://bugzilla.kernel.org/show_bug.cgi?id=218953
Fixes: 169f9102f9198b ("ARM: 9350/1: fault: Implement copy_from_kernel_nofault_allowed()")
Link: https://lore.kernel.org/r/20240611100947.32241-1-yangyj.ee@gmail.com
Signed-off-by: Kees Cook <kees@kernel.org>
---
 arch/arm/mm/fault.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c
index 67c425341a95..ab01b51de559 100644
--- a/arch/arm/mm/fault.c
+++ b/arch/arm/mm/fault.c
@@ -25,6 +25,8 @@
 
 #include "fault.h"
 
+#ifdef CONFIG_MMU
+
 bool copy_from_kernel_nofault_allowed(const void *unsafe_src, size_t size)
 {
 	unsigned long addr = (unsigned long)unsafe_src;
@@ -32,8 +34,6 @@ bool copy_from_kernel_nofault_allowed(const void *unsafe_src, size_t size)
 	return addr >= TASK_SIZE && ULONG_MAX - addr >= size;
 }
 
-#ifdef CONFIG_MMU
-
 /*
  * This is useful to dump out the page tables associated with
  * 'addr' in mm 'mm'.

From 872bb37f6829d4f7f3ed5afe2786add3d4384b4b Mon Sep 17 00:00:00 2001
From: Kees Cook <kees@kernel.org>
Date: Tue, 2 Jul 2024 14:16:16 -0700
Subject: [PATCH 7/7] randomize_kstack: Improve stack alignment codegen

The codgen for adding architecture-specific stack alignment to the
effective alloca() usage is somewhat inefficient and allows a bit to get
carried beyond the desired entropy range. This isn't really a problem,
but it's unexpected and the codegen is kind of bad.

Quoting Mark[1], the disassembly for arm64's invoke_syscall() looks like:

	// offset = raw_cpu_read(kstack_offset)
	mov     x4, sp
	adrp    x0, kstack_offset
	mrs     x5, tpidr_el1
	add     x0, x0, #:lo12:kstack_offset
	ldr     w0, [x0, x5]

	// offset = KSTACK_OFFSET_MAX(offset)
	and     x0, x0, #0x3ff

	// alloca(offset)
	add     x0, x0, #0xf
	and     x0, x0, #0x7f0
	sub     sp, x4, x0

... which in C would be:

	offset = raw_cpu_read(kstack_offset)
	offset &= 0x3ff;			// [0x0, 0x3ff]
	offset += 0xf;				// [0xf, 0x40e]
	offset &= 0x7f0;			// [0x0,

... so when *all* bits [3:0] are 0, they'll have no impact, and when
*any* of bits [3:0] are 1 they'll trigger a carry into bit 4, which
could ripple all the way up and spill into bit 10.

Switch the masking in KSTACK_OFFSET_MAX() to explicitly clear the bottom
bits to avoid the rounding by using 0b1111110000 instead of 0b1111111111:

	// offset = raw_cpu_read(kstack_offset)
	mov     x4, sp
	adrp    x0, 0 <kstack_offset>
	mrs     x5, tpidr_el1
	add     x0, x0, #:lo12:kstack_offset
	ldr     w0, [x0, x5]

	// offset = KSTACK_OFFSET_MAX(offset)
	and     x0, x0, #0x3f0

	// alloca(offset)
	sub     sp, x4, x0

Suggested-by: Mark Rutland <mark.rutland@arm.com>
Link: https://lore.kernel.org/lkml/ZnVfOnIuFl2kNWkT@J2N7QTR9R3/ [1]
Link: https://lore.kernel.org/r/20240702211612.work.576-kees@kernel.org
Signed-off-by: Kees Cook <kees@kernel.org>
---
 include/linux/randomize_kstack.h | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/include/linux/randomize_kstack.h b/include/linux/randomize_kstack.h
index 6d92b68efbf6..1d982dbdd0d0 100644
--- a/include/linux/randomize_kstack.h
+++ b/include/linux/randomize_kstack.h
@@ -32,13 +32,19 @@ DECLARE_PER_CPU(u32, kstack_offset);
 #endif
 
 /*
- * Use, at most, 10 bits of entropy. We explicitly cap this to keep the
- * "VLA" from being unbounded (see above). 10 bits leaves enough room for
- * per-arch offset masks to reduce entropy (by removing higher bits, since
- * high entropy may overly constrain usable stack space), and for
- * compiler/arch-specific stack alignment to remove the lower bits.
+ * Use, at most, 6 bits of entropy (on 64-bit; 8 on 32-bit). This cap is
+ * to keep the "VLA" from being unbounded (see above). Additionally clear
+ * the bottom 4 bits (on 64-bit systems, 2 for 32-bit), since stack
+ * alignment will always be at least word size. This makes the compiler
+ * code gen better when it is applying the actual per-arch alignment to
+ * the final offset. The resulting randomness is reasonable without overly
+ * constraining usable stack space.
  */
-#define KSTACK_OFFSET_MAX(x)	((x) & 0x3FF)
+#ifdef CONFIG_64BIT
+#define KSTACK_OFFSET_MAX(x)	((x) & 0b1111110000)
+#else
+#define KSTACK_OFFSET_MAX(x)	((x) & 0b1111111100)
+#endif
 
 /**
  * add_random_kstack_offset - Increase stack utilization by previously