From 51005a59bcbe1add8802105437b3707ea257f2ea Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 15 May 2024 13:08:28 +0100 Subject: [PATCH 1/7] lkdtm/bugs: add test for hung smp_call_function_single() The CONFIG_CSD_LOCK_WAIT_DEBUG option enables debugging of hung smp_call_function*() calls (e.g. when the target CPU gets stuck within the callback function). Testing this option requires triggering such hangs. This patch adds an lkdtm test with a hung smp_call_function_single() callback, which can be used to test CONFIG_CSD_LOCK_WAIT_DEBUG and NMI backtraces (as CONFIG_CSD_LOCK_WAIT_DEBUG will attempt an NMI backtrace of the hung target CPU). On arm64 using pseudo-NMI, this looks like: | # mount -t debugfs none /sys/kernel/debug/ | # echo SMP_CALL_LOCKUP > /sys/kernel/debug/provoke-crash/DIRECT | lkdtm: Performing direct entry SMP_CALL_LOCKUP | smp: csd: Detected non-responsive CSD lock (#1) on CPU#1, waiting 5000000176 ns for CPU#00 __lkdtm_SMP_CALL_LOCKUP+0x0/0x8(0x0). | smp: csd: CSD lock (#1) handling this request. | Sending NMI from CPU 1 to CPUs 0: | NMI backtrace for cpu 0 | CPU: 0 PID: 0 Comm: swapper/0 Not tainted 6.9.0-rc4-00001-gfdfd281212ec #1 | Hardware name: linux,dummy-virt (DT) | pstate: 60401005 (nZCv daif +PAN -UAO -TCO -DIT +SSBS BTYPE=--) | pc : __lkdtm_SMP_CALL_LOCKUP+0x0/0x8 | lr : __flush_smp_call_function_queue+0x1b0/0x290 | sp : ffff800080003f30 | pmr_save: 00000060 | x29: ffff800080003f30 x28: ffffa4ce961a4900 x27: 0000000000000000 | x26: fff000003fcfa0c0 x25: ffffa4ce961a4900 x24: ffffa4ce959aa140 | x23: ffffa4ce959aa140 x22: 0000000000000000 x21: ffff800080523c40 | x20: 0000000000000000 x19: 0000000000000000 x18: fff05b31aa323000 | x17: fff05b31aa323000 x16: ffff800080000000 x15: 0000330fc3fe6b2c | x14: 0000000000000000 x13: 0000000000000000 x12: 0000000000000279 | x11: 0000000000000040 x10: fff000000302d0a8 x9 : fff000000302d0a0 | x8 : fff0000003400270 x7 : 0000000000000000 x6 : ffffa4ce9451b810 | x5 : 0000000000000000 x4 : fff05b31aa323000 x3 : ffff800080003f30 | x2 : fff05b31aa323000 x1 : ffffa4ce959aa140 x0 : 0000000000000000 | Call trace: | __lkdtm_SMP_CALL_LOCKUP+0x0/0x8 | generic_smp_call_function_single_interrupt+0x14/0x20 | ipi_handler+0xb8/0x178 | handle_percpu_devid_irq+0x84/0x130 | generic_handle_domain_irq+0x2c/0x44 | gic_handle_irq+0x118/0x240 | call_on_irq_stack+0x24/0x4c | do_interrupt_handler+0x80/0x84 | el1_interrupt+0x44/0xc0 | el1h_64_irq_handler+0x18/0x24 | el1h_64_irq+0x78/0x7c | default_idle_call+0x40/0x60 | do_idle+0x23c/0x2d0 | cpu_startup_entry+0x38/0x3c | kernel_init+0x0/0x1d8 | start_kernel+0x51c/0x608 | __primary_switched+0x80/0x88 | CPU: 1 PID: 128 Comm: sh Not tainted 6.9.0-rc4-00001-gfdfd281212ec #1 | Hardware name: linux,dummy-virt (DT) | Call trace: | dump_backtrace+0x90/0xe8 | show_stack+0x18/0x24 | dump_stack_lvl+0xac/0xe8 | dump_stack+0x18/0x24 | csd_lock_wait_toolong+0x268/0x338 | smp_call_function_single+0x1dc/0x2f0 | lkdtm_SMP_CALL_LOCKUP+0xcc/0xfc | lkdtm_do_action+0x1c/0x38 | direct_entry+0xbc/0x14c | full_proxy_write+0x60/0xb4 | vfs_write+0xd0/0x35c | ksys_write+0x70/0x104 | __arm64_sys_write+0x1c/0x28 | invoke_syscall+0x48/0x114 | el0_svc_common.constprop.0+0x40/0xe0 | do_el0_svc+0x1c/0x28 | el0_svc+0x38/0x108 | el0t_64_sync_handler+0x120/0x12c | el0t_64_sync+0x1a4/0x1a8 | smp: csd: Continued non-responsive CSD lock (#1) on CPU#1, waiting 10000064272 ns for CPU#00 __lkdtm_SMP_CALL_LOCKUP+0x0/0x8(0x0). | smp: csd: CSD lock (#1) handling this request. | smp: csd: Continued non-responsive CSD lock (#1) on CPU#1, waiting 15000064384 ns for CPU#00 __lkdtm_SMP_CALL_LOCKUP+0x0/0x8(0x0). | smp: csd: CSD lock (#1) handling this request. Signed-off-by: Mark Rutland Acked-by: Paul E. McKenney Cc: Kees Cook Link: https://lore.kernel.org/r/20240515120828.375585-1-mark.rutland@arm.com Signed-off-by: Kees Cook --- drivers/misc/lkdtm/bugs.c | 30 +++++++++++++++++++++++++ tools/testing/selftests/lkdtm/tests.txt | 1 + 2 files changed, 31 insertions(+) diff --git a/drivers/misc/lkdtm/bugs.c b/drivers/misc/lkdtm/bugs.c index 5178c02b21eb..62ba01525479 100644 --- a/drivers/misc/lkdtm/bugs.c +++ b/drivers/misc/lkdtm/bugs.c @@ -286,6 +286,35 @@ static void lkdtm_HARDLOCKUP(void) cpu_relax(); } +static void __lkdtm_SMP_CALL_LOCKUP(void *unused) +{ + for (;;) + cpu_relax(); +} + +static void lkdtm_SMP_CALL_LOCKUP(void) +{ + unsigned int cpu, target; + + cpus_read_lock(); + + cpu = get_cpu(); + target = cpumask_any_but(cpu_online_mask, cpu); + + if (target >= nr_cpu_ids) { + pr_err("FAIL: no other online CPUs\n"); + goto out_put_cpus; + } + + smp_call_function_single(target, __lkdtm_SMP_CALL_LOCKUP, NULL, 1); + + pr_err("FAIL: did not hang\n"); + +out_put_cpus: + put_cpu(); + cpus_read_unlock(); +} + static void lkdtm_SPINLOCKUP(void) { /* Must be called twice to trigger. */ @@ -680,6 +709,7 @@ static struct crashtype crashtypes[] = { CRASHTYPE(UNALIGNED_LOAD_STORE_WRITE), CRASHTYPE(SOFTLOCKUP), CRASHTYPE(HARDLOCKUP), + CRASHTYPE(SMP_CALL_LOCKUP), CRASHTYPE(SPINLOCKUP), CRASHTYPE(HUNG_TASK), CRASHTYPE(OVERFLOW_SIGNED), diff --git a/tools/testing/selftests/lkdtm/tests.txt b/tools/testing/selftests/lkdtm/tests.txt index 368973f05250..cff124c1eddd 100644 --- a/tools/testing/selftests/lkdtm/tests.txt +++ b/tools/testing/selftests/lkdtm/tests.txt @@ -31,6 +31,7 @@ SLAB_FREE_CROSS SLAB_FREE_PAGE #SOFTLOCKUP Hangs the system #HARDLOCKUP Hangs the system +#SMP_CALL_LOCKUP Hangs the system #SPINLOCKUP Hangs the system #HUNG_TASK Hangs the system EXEC_DATA From d6f635bcaca8d38dfa47ee20658705f9eff156b5 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 30 Apr 2024 17:02:22 -0700 Subject: [PATCH 2/7] x86/alternatives: Make FineIBT mode Kconfig selectable Since FineIBT performs checking at the destination, it is weaker against attacks that can construct arbitrary executable memory contents. As such, some system builders want to run with FineIBT disabled by default. Allow the "cfi=kcfi" boot param mode to be selectable through Kconfig via the newly introduced CONFIG_CFI_AUTO_DEFAULT. Reviewed-by: Sami Tolvanen Reviewed-by: Nathan Chancellor Tested-by: Nathan Chancellor Link: https://lore.kernel.org/r/20240501000218.work.998-kees@kernel.org Signed-off-by: Kees Cook --- arch/x86/Kconfig | 9 +++++++++ arch/x86/include/asm/cfi.h | 2 +- arch/x86/kernel/alternative.c | 8 ++++---- 3 files changed, 14 insertions(+), 5 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 1d7122a1883e..56e301921d2a 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -2427,6 +2427,15 @@ config STRICT_SIGALTSTACK_SIZE Say 'N' unless you want to really enforce this check. +config CFI_AUTO_DEFAULT + bool "Attempt to use FineIBT by default at boot time" + depends on FINEIBT + default y + help + Attempt to use FineIBT by default at boot time. If enabled, + this is the same as booting with "cfi=auto". If disabled, + this is the same as booting with "cfi=kcfi". + source "kernel/livepatch/Kconfig" endmenu diff --git a/arch/x86/include/asm/cfi.h b/arch/x86/include/asm/cfi.h index 7cd752557905..31d19c815f99 100644 --- a/arch/x86/include/asm/cfi.h +++ b/arch/x86/include/asm/cfi.h @@ -93,7 +93,7 @@ * */ enum cfi_mode { - CFI_DEFAULT, /* FineIBT if hardware has IBT, otherwise kCFI */ + CFI_AUTO, /* FineIBT if hardware has IBT, otherwise kCFI */ CFI_OFF, /* Taditional / IBT depending on .config */ CFI_KCFI, /* Optionally CALL_PADDING, IBT, RETPOLINE */ CFI_FINEIBT, /* see arch/x86/kernel/alternative.c */ diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 89de61243272..7fcba437abae 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -885,8 +885,8 @@ void __init_or_module apply_seal_endbr(s32 *start, s32 *end) { } #endif /* CONFIG_X86_KERNEL_IBT */ -#ifdef CONFIG_FINEIBT -#define __CFI_DEFAULT CFI_DEFAULT +#ifdef CONFIG_CFI_AUTO_DEFAULT +#define __CFI_DEFAULT CFI_AUTO #elif defined(CONFIG_CFI_CLANG) #define __CFI_DEFAULT CFI_KCFI #else @@ -994,7 +994,7 @@ static __init int cfi_parse_cmdline(char *str) } if (!strcmp(str, "auto")) { - cfi_mode = CFI_DEFAULT; + cfi_mode = CFI_AUTO; } else if (!strcmp(str, "off")) { cfi_mode = CFI_OFF; cfi_rand = false; @@ -1254,7 +1254,7 @@ static void __apply_fineibt(s32 *start_retpoline, s32 *end_retpoline, "FineIBT preamble wrong size: %ld", fineibt_preamble_size)) return; - if (cfi_mode == CFI_DEFAULT) { + if (cfi_mode == CFI_AUTO) { cfi_mode = CFI_KCFI; if (HAS_KERNEL_IBT && cpu_feature_enabled(X86_FEATURE_IBT)) cfi_mode = CFI_FINEIBT; From 2003e483a81cc235e29f77da3f6b256cb4b348e7 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 19 Jun 2024 13:31:05 -0700 Subject: [PATCH 3/7] fortify: Do not special-case 0-sized destinations All fake flexible arrays should have been removed now, so remove the special casing that was avoiding checking them. If a destination claims to be 0 sized, believe it. This is especially important for cases where __counted_by is in use and may have a 0 element count. Link: https://lore.kernel.org/r/20240619203105.work.747-kees@kernel.org Signed-off-by: Kees Cook --- include/linux/fortify-string.h | 8 ++------ lib/fortify_kunit.c | 3 +-- 2 files changed, 3 insertions(+), 8 deletions(-) diff --git a/include/linux/fortify-string.h b/include/linux/fortify-string.h index 7e0f340bf363..0d99bf11d260 100644 --- a/include/linux/fortify-string.h +++ b/include/linux/fortify-string.h @@ -601,11 +601,7 @@ __FORTIFY_INLINE bool fortify_memcpy_chk(__kernel_size_t size, /* * Warn when writing beyond destination field size. * - * We must ignore p_size_field == 0 for existing 0-element - * fake flexible arrays, until they are all converted to - * proper flexible arrays. - * - * The implementation of __builtin_*object_size() behaves + * Note the implementation of __builtin_*object_size() behaves * like sizeof() when not directly referencing a flexible * array member, which means there will be many bounds checks * that will appear at run-time, without a way for them to be @@ -613,7 +609,7 @@ __FORTIFY_INLINE bool fortify_memcpy_chk(__kernel_size_t size, * is specifically the flexible array member). * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=101832 */ - if (p_size_field != 0 && p_size_field != SIZE_MAX && + if (p_size_field != SIZE_MAX && p_size != p_size_field && p_size_field < size) return true; diff --git a/lib/fortify_kunit.c b/lib/fortify_kunit.c index f9cc467334ce..f0c64b9e9b46 100644 --- a/lib/fortify_kunit.c +++ b/lib/fortify_kunit.c @@ -910,10 +910,9 @@ static void fortify_test_##memfunc(struct kunit *test) \ memfunc(zero.buf, srcB, 0 + unconst); \ KUNIT_EXPECT_EQ(test, fortify_read_overflows, 0); \ KUNIT_EXPECT_EQ(test, fortify_write_overflows, 0); \ - /* We currently explicitly ignore zero-sized dests. */ \ memfunc(zero.buf, srcB, 1 + unconst); \ KUNIT_EXPECT_EQ(test, fortify_read_overflows, 0); \ - KUNIT_EXPECT_EQ(test, fortify_write_overflows, 0); \ + KUNIT_EXPECT_EQ(test, fortify_write_overflows, 1); \ } __fortify_test(memcpy) __fortify_test(memmove) From ef40d28f17bd384d7e0b630c7d83f108a526351b Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 19 Jun 2024 14:47:15 -0700 Subject: [PATCH 4/7] randomize_kstack: Remove non-functional per-arch entropy filtering An unintended consequence of commit 9c573cd31343 ("randomize_kstack: Improve entropy diffusion") was that the per-architecture entropy size filtering reduced how many bits were being added to the mix, rather than how many bits were being used during the offsetting. All architectures fell back to the existing default of 0x3FF (10 bits), which will consume at most 1KiB of stack space. It seems that this is working just fine, so let's avoid the confusion and update everything to use the default. The prior intent of the per-architecture limits were: arm64: capped at 0x1FF (9 bits), 5 bits effective powerpc: uncapped (10 bits), 6 or 7 bits effective riscv: uncapped (10 bits), 6 bits effective x86: capped at 0xFF (8 bits), 5 (x86_64) or 6 (ia32) bits effective s390: capped at 0xFF (8 bits), undocumented effective entropy Current discussion has led to just dropping the original per-architecture filters. The additional entropy appears to be safe for arm64, x86, and s390. Quoting Arnd, "There is no point pretending that 15.75KB is somehow safe to use while 15.00KB is not." Co-developed-by: Yuntao Liu Signed-off-by: Yuntao Liu Fixes: 9c573cd31343 ("randomize_kstack: Improve entropy diffusion") Link: https://lore.kernel.org/r/20240617133721.377540-1-liuyuntao12@huawei.com Reviewed-by: Arnd Bergmann Acked-by: Mark Rutland Acked-by: Heiko Carstens # s390 Link: https://lore.kernel.org/r/20240619214711.work.953-kees@kernel.org Signed-off-by: Kees Cook --- arch/arm64/kernel/syscall.c | 16 +++++++--------- arch/s390/include/asm/entry-common.h | 2 +- arch/x86/include/asm/entry-common.h | 15 ++++++--------- 3 files changed, 14 insertions(+), 19 deletions(-) diff --git a/arch/arm64/kernel/syscall.c b/arch/arm64/kernel/syscall.c index ad198262b981..7230f6e20ab8 100644 --- a/arch/arm64/kernel/syscall.c +++ b/arch/arm64/kernel/syscall.c @@ -53,17 +53,15 @@ static void invoke_syscall(struct pt_regs *regs, unsigned int scno, syscall_set_return_value(current, regs, 0, ret); /* - * Ultimately, this value will get limited by KSTACK_OFFSET_MAX(), - * but not enough for arm64 stack utilization comfort. To keep - * reasonable stack head room, reduce the maximum offset to 9 bits. + * This value will get limited by KSTACK_OFFSET_MAX(), which is 10 + * bits. The actual entropy will be further reduced by the compiler + * when applying stack alignment constraints: the AAPCS mandates a + * 16-byte aligned SP at function boundaries, which will remove the + * 4 low bits from any entropy chosen here. * - * The actual entropy will be further reduced by the compiler when - * applying stack alignment constraints: the AAPCS mandates a - * 16-byte (i.e. 4-bit) aligned SP at function boundaries. - * - * The resulting 5 bits of entropy is seen in SP[8:4]. + * The resulting 6 bits of entropy is seen in SP[9:4]. */ - choose_random_kstack_offset(get_random_u16() & 0x1FF); + choose_random_kstack_offset(get_random_u16()); } static inline bool has_syscall_work(unsigned long flags) diff --git a/arch/s390/include/asm/entry-common.h b/arch/s390/include/asm/entry-common.h index 7f5004065e8a..35555c944630 100644 --- a/arch/s390/include/asm/entry-common.h +++ b/arch/s390/include/asm/entry-common.h @@ -54,7 +54,7 @@ static __always_inline void arch_exit_to_user_mode(void) static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs, unsigned long ti_work) { - choose_random_kstack_offset(get_tod_clock_fast() & 0xff); + choose_random_kstack_offset(get_tod_clock_fast()); } #define arch_exit_to_user_mode_prepare arch_exit_to_user_mode_prepare diff --git a/arch/x86/include/asm/entry-common.h b/arch/x86/include/asm/entry-common.h index 7e523bb3d2d3..fb2809b20b0a 100644 --- a/arch/x86/include/asm/entry-common.h +++ b/arch/x86/include/asm/entry-common.h @@ -73,19 +73,16 @@ static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs, #endif /* - * Ultimately, this value will get limited by KSTACK_OFFSET_MAX(), - * but not enough for x86 stack utilization comfort. To keep - * reasonable stack head room, reduce the maximum offset to 8 bits. - * - * The actual entropy will be further reduced by the compiler when - * applying stack alignment constraints (see cc_stack_align4/8 in + * This value will get limited by KSTACK_OFFSET_MAX(), which is 10 + * bits. The actual entropy will be further reduced by the compiler + * when applying stack alignment constraints (see cc_stack_align4/8 in * arch/x86/Makefile), which will remove the 3 (x86_64) or 2 (ia32) * low bits from any entropy chosen here. * - * Therefore, final stack offset entropy will be 5 (x86_64) or - * 6 (ia32) bits. + * Therefore, final stack offset entropy will be 7 (x86_64) or + * 8 (ia32) bits. */ - choose_random_kstack_offset(rdtsc() & 0xFF); + choose_random_kstack_offset(rdtsc()); } #define arch_exit_to_user_mode_prepare arch_exit_to_user_mode_prepare From 18c18b1ff6c648ea62571554dfd698110757f894 Mon Sep 17 00:00:00 2001 From: Thorsten Blum Date: Sun, 30 Jun 2024 01:36:09 +0200 Subject: [PATCH 5/7] gcc-plugins: Remove duplicate included header file stringpool.h The header file stringpool.h is included for GCC version >= 8 and then again for all versions. Since the header file stringpool.h was added in GCC 4.9 and the kernel currently requires GCC 5.1 as a minimum, remove the conditional include. Including the header file only once removes the following warning reported by make includecheck: stringpool.h is included more than once However, it's important to include stringpool.h before attribs.h because attribs.h uses some of its functions. Compile-tested with GCC 14. Signed-off-by: Thorsten Blum Link: https://lore.kernel.org/r/20240629233608.278028-2-thorsten.blum@toblux.com Signed-off-by: Kees Cook --- scripts/gcc-plugins/gcc-common.h | 5 ----- 1 file changed, 5 deletions(-) diff --git a/scripts/gcc-plugins/gcc-common.h b/scripts/gcc-plugins/gcc-common.h index 1ae39b9f4a95..3222c1070444 100644 --- a/scripts/gcc-plugins/gcc-common.h +++ b/scripts/gcc-plugins/gcc-common.h @@ -62,11 +62,7 @@ #include "pass_manager.h" #include "predict.h" #include "ipa-utils.h" - -#if BUILDING_GCC_VERSION >= 8000 #include "stringpool.h" -#endif - #include "attribs.h" #include "varasm.h" #include "stor-layout.h" @@ -78,7 +74,6 @@ #include "context.h" #include "tree-ssa-alias.h" #include "tree-ssa.h" -#include "stringpool.h" #if BUILDING_GCC_VERSION >= 7000 #include "tree-vrp.h" #endif From 3ccea4784fddd96fbd6c4497eb28b45dab638c2a Mon Sep 17 00:00:00 2001 From: Yanjun Yang Date: Tue, 11 Jun 2024 18:09:47 +0800 Subject: [PATCH 6/7] ARM: Remove address checking for MMUless devices Commit 169f9102f9198b ("ARM: 9350/1: fault: Implement copy_from_kernel_nofault_allowed()") added the function to check address before use. However, for devices without MMU, addr > TASK_SIZE will always fail. This patch move this function after the #ifdef CONFIG_MMU statement. Signed-off-by: Yanjun Yang Acked-by: Ard Biesheuvel Closes: https://bugzilla.kernel.org/show_bug.cgi?id=218953 Fixes: 169f9102f9198b ("ARM: 9350/1: fault: Implement copy_from_kernel_nofault_allowed()") Link: https://lore.kernel.org/r/20240611100947.32241-1-yangyj.ee@gmail.com Signed-off-by: Kees Cook --- arch/arm/mm/fault.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c index 67c425341a95..ab01b51de559 100644 --- a/arch/arm/mm/fault.c +++ b/arch/arm/mm/fault.c @@ -25,6 +25,8 @@ #include "fault.h" +#ifdef CONFIG_MMU + bool copy_from_kernel_nofault_allowed(const void *unsafe_src, size_t size) { unsigned long addr = (unsigned long)unsafe_src; @@ -32,8 +34,6 @@ bool copy_from_kernel_nofault_allowed(const void *unsafe_src, size_t size) return addr >= TASK_SIZE && ULONG_MAX - addr >= size; } -#ifdef CONFIG_MMU - /* * This is useful to dump out the page tables associated with * 'addr' in mm 'mm'. From 872bb37f6829d4f7f3ed5afe2786add3d4384b4b Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 2 Jul 2024 14:16:16 -0700 Subject: [PATCH 7/7] randomize_kstack: Improve stack alignment codegen The codgen for adding architecture-specific stack alignment to the effective alloca() usage is somewhat inefficient and allows a bit to get carried beyond the desired entropy range. This isn't really a problem, but it's unexpected and the codegen is kind of bad. Quoting Mark[1], the disassembly for arm64's invoke_syscall() looks like: // offset = raw_cpu_read(kstack_offset) mov x4, sp adrp x0, kstack_offset mrs x5, tpidr_el1 add x0, x0, #:lo12:kstack_offset ldr w0, [x0, x5] // offset = KSTACK_OFFSET_MAX(offset) and x0, x0, #0x3ff // alloca(offset) add x0, x0, #0xf and x0, x0, #0x7f0 sub sp, x4, x0 ... which in C would be: offset = raw_cpu_read(kstack_offset) offset &= 0x3ff; // [0x0, 0x3ff] offset += 0xf; // [0xf, 0x40e] offset &= 0x7f0; // [0x0, ... so when *all* bits [3:0] are 0, they'll have no impact, and when *any* of bits [3:0] are 1 they'll trigger a carry into bit 4, which could ripple all the way up and spill into bit 10. Switch the masking in KSTACK_OFFSET_MAX() to explicitly clear the bottom bits to avoid the rounding by using 0b1111110000 instead of 0b1111111111: // offset = raw_cpu_read(kstack_offset) mov x4, sp adrp x0, 0 mrs x5, tpidr_el1 add x0, x0, #:lo12:kstack_offset ldr w0, [x0, x5] // offset = KSTACK_OFFSET_MAX(offset) and x0, x0, #0x3f0 // alloca(offset) sub sp, x4, x0 Suggested-by: Mark Rutland Link: https://lore.kernel.org/lkml/ZnVfOnIuFl2kNWkT@J2N7QTR9R3/ [1] Link: https://lore.kernel.org/r/20240702211612.work.576-kees@kernel.org Signed-off-by: Kees Cook --- include/linux/randomize_kstack.h | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/include/linux/randomize_kstack.h b/include/linux/randomize_kstack.h index 6d92b68efbf6..1d982dbdd0d0 100644 --- a/include/linux/randomize_kstack.h +++ b/include/linux/randomize_kstack.h @@ -32,13 +32,19 @@ DECLARE_PER_CPU(u32, kstack_offset); #endif /* - * Use, at most, 10 bits of entropy. We explicitly cap this to keep the - * "VLA" from being unbounded (see above). 10 bits leaves enough room for - * per-arch offset masks to reduce entropy (by removing higher bits, since - * high entropy may overly constrain usable stack space), and for - * compiler/arch-specific stack alignment to remove the lower bits. + * Use, at most, 6 bits of entropy (on 64-bit; 8 on 32-bit). This cap is + * to keep the "VLA" from being unbounded (see above). Additionally clear + * the bottom 4 bits (on 64-bit systems, 2 for 32-bit), since stack + * alignment will always be at least word size. This makes the compiler + * code gen better when it is applying the actual per-arch alignment to + * the final offset. The resulting randomness is reasonable without overly + * constraining usable stack space. */ -#define KSTACK_OFFSET_MAX(x) ((x) & 0x3FF) +#ifdef CONFIG_64BIT +#define KSTACK_OFFSET_MAX(x) ((x) & 0b1111110000) +#else +#define KSTACK_OFFSET_MAX(x) ((x) & 0b1111111100) +#endif /** * add_random_kstack_offset - Increase stack utilization by previously