From b29a62d87cc0af3e9d134e9e0863b2cb053070b8 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Sun, 7 Jul 2024 15:05:19 -0400 Subject: [PATCH 001/103] mul_u64_u64_div_u64: make it precise always MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Patch series "mul_u64_u64_div_u64: new implementation", v3. This provides an implementation for mul_u64_u64_div_u64() that always produces exact results. This patch (of 2): Library facilities must always return exact results. If the caller may be contented with approximations then it should do the approximation on its own. In this particular case the comment in the code says "the algorithm ... below might lose some precision". Well, if you try it with e.g.: a = 18446462598732840960 b = 18446462598732840960 c = 18446462598732840961 then the produced answer is 0 whereas the exact answer should be 18446462598732840959. This is _some_ precision lost indeed! Let's reimplement this function so it always produces the exact result regardless of its inputs while preserving existing fast paths when possible. Uwe said: : My personal interest is to get the calculations in pwm drivers right. : This function is used in several drivers below drivers/pwm/ . With the : errors in mul_u64_u64_div_u64(), pwm consumers might not get the : settings they request. Although I have to admit that I'm not aware it : breaks real use cases (because typically the periods used are too short : to make the involved multiplications overflow), but I pretty sure am : not aware of all usages and it breaks testing. : : Another justification is commits like : https://git.kernel.org/tip/77baa5bafcbe1b2a15ef9c37232c21279c95481c, : where people start to work around the precision shortcomings of : mul_u64_u64_div_u64(). Link: https://lkml.kernel.org/r/20240707190648.1982714-1-nico@fluxnic.net Link: https://lkml.kernel.org/r/20240707190648.1982714-2-nico@fluxnic.net Signed-off-by: Nicolas Pitre Tested-by: Uwe Kleine-König Reviewed-by: Uwe Kleine-König Tested-by: Biju Das Signed-off-by: Andrew Morton --- lib/math/div64.c | 110 ++++++++++++++++++++++++++++------------------- 1 file changed, 66 insertions(+), 44 deletions(-) diff --git a/lib/math/div64.c b/lib/math/div64.c index 191761b1b623..b7fc75246399 100644 --- a/lib/math/div64.c +++ b/lib/math/div64.c @@ -186,55 +186,77 @@ EXPORT_SYMBOL(iter_div_u64_rem); #ifndef mul_u64_u64_div_u64 u64 mul_u64_u64_div_u64(u64 a, u64 b, u64 c) { - u64 res = 0, div, rem; - int shift; + if (ilog2(a) + ilog2(b) <= 62) + return div64_u64(a * b, c); - /* can a * b overflow ? */ - if (ilog2(a) + ilog2(b) > 62) { +#if defined(__SIZEOF_INT128__) + + /* native 64x64=128 bits multiplication */ + u128 prod = (u128)a * b; + u64 n_lo = prod, n_hi = prod >> 64; + +#else + + /* perform a 64x64=128 bits multiplication manually */ + u32 a_lo = a, a_hi = a >> 32, b_lo = b, b_hi = b >> 32; + u64 x, y, z; + + x = (u64)a_lo * b_lo; + y = (u64)a_lo * b_hi + (u32)(x >> 32); + z = (u64)a_hi * b_hi + (u32)(y >> 32); + y = (u64)a_hi * b_lo + (u32)y; + z += (u32)(y >> 32); + x = (y << 32) + (u32)x; + + u64 n_lo = x, n_hi = z; + +#endif + + int shift = __builtin_ctzll(c); + + /* try reducing the fraction in case the dividend becomes <= 64 bits */ + if ((n_hi >> shift) == 0) { + u64 n = (n_lo >> shift) | (n_hi << (64 - shift)); + + return div64_u64(n, c >> shift); /* - * Note that the algorithm after the if block below might lose - * some precision and the result is more exact for b > a. So - * exchange a and b if a is bigger than b. - * - * For example with a = 43980465100800, b = 100000000, c = 1000000000 - * the below calculation doesn't modify b at all because div == 0 - * and then shift becomes 45 + 26 - 62 = 9 and so the result - * becomes 4398035251080. However with a and b swapped the exact - * result is calculated (i.e. 4398046510080). + * The remainder value if needed would be: + * res = div64_u64_rem(n, c >> shift, &rem); + * rem = (rem << shift) + (n_lo - (n << shift)); */ - if (a > b) - swap(a, b); - - /* - * (b * a) / c is equal to - * - * (b / c) * a + - * (b % c) * a / c - * - * if nothing overflows. Can the 1st multiplication - * overflow? Yes, but we do not care: this can only - * happen if the end result can't fit in u64 anyway. - * - * So the code below does - * - * res = (b / c) * a; - * b = b % c; - */ - div = div64_u64_rem(b, c, &rem); - res = div * a; - b = rem; - - shift = ilog2(a) + ilog2(b) - 62; - if (shift > 0) { - /* drop precision */ - b >>= shift; - c >>= shift; - if (!c) - return res; - } } - return res + div64_u64(a * b, c); + if (n_hi >= c) { + /* overflow: result is unrepresentable in a u64 */ + return -1; + } + + /* Do the full 128 by 64 bits division */ + + shift = __builtin_clzll(c); + c <<= shift; + + int p = 64 + shift; + u64 res = 0; + bool carry; + + do { + carry = n_hi >> 63; + shift = carry ? 1 : __builtin_clzll(n_hi); + if (p < shift) + break; + p -= shift; + n_hi <<= shift; + n_hi |= n_lo >> (64 - shift); + n_lo <<= shift; + if (carry || (n_hi >= c)) { + n_hi -= c; + res |= 1ULL << p; + } + } while (n_hi); + /* The remainder value if needed would be n_hi << p */ + + return res; } EXPORT_SYMBOL(mul_u64_u64_div_u64); #endif From 1635e62e75a7bbb1c6274f6b43911cedfe0da60a Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Sun, 7 Jul 2024 15:05:20 -0400 Subject: [PATCH 002/103] mul_u64_u64_div_u64: basic sanity test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Verify that edge cases produce proper results, and some more. [npitre@baylibre.com: avoid undefined shift value] Link: https://lkml.kernel.org/r/7rrs9pn1-n266-3013-9q6n-1osp8r8s0rrn@syhkavp.arg Link: https://lkml.kernel.org/r/20240707190648.1982714-3-nico@fluxnic.net Signed-off-by: Nicolas Pitre Reviewed-by: Uwe Kleine-König Cc: Biju Das Signed-off-by: Andrew Morton --- lib/Kconfig.debug | 10 +++ lib/math/Makefile | 1 + lib/math/div64.c | 9 ++- lib/math/test_mul_u64_u64_div_u64.c | 99 +++++++++++++++++++++++++++++ 4 files changed, 118 insertions(+), 1 deletion(-) create mode 100644 lib/math/test_mul_u64_u64_div_u64.c diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index a30c03a66172..bf0995d328b3 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -2280,6 +2280,16 @@ config TEST_DIV64 If unsure, say N. +config TEST_MULDIV64 + tristate "mul_u64_u64_div_u64() test" + depends on DEBUG_KERNEL || m + help + Enable this to turn on 'mul_u64_u64_div_u64()' function test. + This test is executed only once during system boot (so affects + only boot time), or at module load time. + + If unsure, say N. + config TEST_IOV_ITER tristate "Test iov_iter operation" if !KUNIT_ALL_TESTS depends on KUNIT diff --git a/lib/math/Makefile b/lib/math/Makefile index 91fcdb0c9efe..981a26127e08 100644 --- a/lib/math/Makefile +++ b/lib/math/Makefile @@ -6,4 +6,5 @@ obj-$(CONFIG_PRIME_NUMBERS) += prime_numbers.o obj-$(CONFIG_RATIONAL) += rational.o obj-$(CONFIG_TEST_DIV64) += test_div64.o +obj-$(CONFIG_TEST_MULDIV64) += test_mul_u64_u64_div_u64.o obj-$(CONFIG_RATIONAL_KUNIT_TEST) += rational-test.o diff --git a/lib/math/div64.c b/lib/math/div64.c index b7fc75246399..5faa29208bdb 100644 --- a/lib/math/div64.c +++ b/lib/math/div64.c @@ -212,11 +212,18 @@ u64 mul_u64_u64_div_u64(u64 a, u64 b, u64 c) #endif + /* make sure c is not zero, trigger exception otherwise */ +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wdiv-by-zero" + if (unlikely(c == 0)) + return 1/0; +#pragma GCC diagnostic pop + int shift = __builtin_ctzll(c); /* try reducing the fraction in case the dividend becomes <= 64 bits */ if ((n_hi >> shift) == 0) { - u64 n = (n_lo >> shift) | (n_hi << (64 - shift)); + u64 n = shift ? (n_lo >> shift) | (n_hi << (64 - shift)) : n_lo; return div64_u64(n, c >> shift); /* diff --git a/lib/math/test_mul_u64_u64_div_u64.c b/lib/math/test_mul_u64_u64_div_u64.c new file mode 100644 index 000000000000..58d058de4e73 --- /dev/null +++ b/lib/math/test_mul_u64_u64_div_u64.c @@ -0,0 +1,99 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2024 BayLibre SAS + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include + +typedef struct { u64 a; u64 b; u64 c; u64 result; } test_params; + +static test_params test_values[] = { +/* this contains many edge values followed by a couple random values */ +{ 0xb, 0x7, 0x3, 0x19 }, +{ 0xffff0000, 0xffff0000, 0xf, 0x1110eeef00000000 }, +{ 0xffffffff, 0xffffffff, 0x1, 0xfffffffe00000001 }, +{ 0xffffffff, 0xffffffff, 0x2, 0x7fffffff00000000 }, +{ 0x1ffffffff, 0xffffffff, 0x2, 0xfffffffe80000000 }, +{ 0x1ffffffff, 0xffffffff, 0x3, 0xaaaaaaa9aaaaaaab }, +{ 0x1ffffffff, 0x1ffffffff, 0x4, 0xffffffff00000000 }, +{ 0xffff000000000000, 0xffff000000000000, 0xffff000000000001, 0xfffeffffffffffff }, +{ 0x3333333333333333, 0x3333333333333333, 0x5555555555555555, 0x1eb851eb851eb851 }, +{ 0x7fffffffffffffff, 0x2, 0x3, 0x5555555555555554 }, +{ 0xffffffffffffffff, 0x2, 0x8000000000000000, 0x3 }, +{ 0xffffffffffffffff, 0x2, 0xc000000000000000, 0x2 }, +{ 0xffffffffffffffff, 0x4000000000000004, 0x8000000000000000, 0x8000000000000007 }, +{ 0xffffffffffffffff, 0x4000000000000001, 0x8000000000000000, 0x8000000000000001 }, +{ 0xffffffffffffffff, 0x8000000000000001, 0xffffffffffffffff, 0x8000000000000001 }, +{ 0xfffffffffffffffe, 0x8000000000000001, 0xffffffffffffffff, 0x8000000000000000 }, +{ 0xffffffffffffffff, 0x8000000000000001, 0xfffffffffffffffe, 0x8000000000000001 }, +{ 0xffffffffffffffff, 0x8000000000000001, 0xfffffffffffffffd, 0x8000000000000002 }, +{ 0x7fffffffffffffff, 0xffffffffffffffff, 0xc000000000000000, 0xaaaaaaaaaaaaaaa8 }, +{ 0xffffffffffffffff, 0x7fffffffffffffff, 0xa000000000000000, 0xccccccccccccccca }, +{ 0xffffffffffffffff, 0x7fffffffffffffff, 0x9000000000000000, 0xe38e38e38e38e38b }, +{ 0x7fffffffffffffff, 0x7fffffffffffffff, 0x5000000000000000, 0xccccccccccccccc9 }, +{ 0xffffffffffffffff, 0xfffffffffffffffe, 0xffffffffffffffff, 0xfffffffffffffffe }, +{ 0xe6102d256d7ea3ae, 0x70a77d0be4c31201, 0xd63ec35ab3220357, 0x78f8bf8cc86c6e18 }, +{ 0xf53bae05cb86c6e1, 0x3847b32d2f8d32e0, 0xcfd4f55a647f403c, 0x42687f79d8998d35 }, +{ 0x9951c5498f941092, 0x1f8c8bfdf287a251, 0xa3c8dc5f81ea3fe2, 0x1d887cb25900091f }, +{ 0x374fee9daa1bb2bb, 0x0d0bfbff7b8ae3ef, 0xc169337bd42d5179, 0x03bb2dbaffcbb961 }, +{ 0xeac0d03ac10eeaf0, 0x89be05dfa162ed9b, 0x92bb1679a41f0e4b, 0xdc5f5cc9e270d216 }, +}; + +/* + * The above table can be verified with the following shell script: + * + * #!/bin/sh + * sed -ne 's/^{ \+\(.*\), \+\(.*\), \+\(.*\), \+\(.*\) },$/\1 \2 \3 \4/p' \ + * lib/math/test_mul_u64_u64_div_u64.c | + * while read a b c r; do + * expected=$( printf "obase=16; ibase=16; %X * %X / %X\n" $a $b $c | bc ) + * given=$( printf "%X\n" $r ) + * if [ "$expected" = "$given" ]; then + * echo "$a * $b / $c = $r OK" + * else + * echo "$a * $b / $c = $r is wrong" >&2 + * echo "should be equivalent to 0x$expected" >&2 + * exit 1 + * fi + * done + */ + +static int __init test_init(void) +{ + int i; + + pr_info("Starting mul_u64_u64_div_u64() test\n"); + + for (i = 0; i < ARRAY_SIZE(test_values); i++) { + u64 a = test_values[i].a; + u64 b = test_values[i].b; + u64 c = test_values[i].c; + u64 expected_result = test_values[i].result; + u64 result = mul_u64_u64_div_u64(a, b, c); + + if (result != expected_result) { + pr_err("ERROR: 0x%016llx * 0x%016llx / 0x%016llx\n", a, b, c); + pr_err("ERROR: expected result: %016llx\n", expected_result); + pr_err("ERROR: obtained result: %016llx\n", result); + } + } + + pr_info("Completed mul_u64_u64_div_u64() test\n"); + return 0; +} + +static void __exit test_exit(void) +{ +} + +module_init(test_init); +module_exit(test_exit); + +MODULE_AUTHOR("Nicolas Pitre"); +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("mul_u64_u64_div_u64() test module"); From 053a5e4cbba88625ac6b53dea6371006237c34ba Mon Sep 17 00:00:00 2001 From: Jeff Johnson Date: Mon, 15 Jul 2024 07:18:56 -0700 Subject: [PATCH 003/103] lib: test_objpool: add missing MODULE_DESCRIPTION() macro make allmodconfig && make W=1 C=1 reports: WARNING: modpost: missing MODULE_DESCRIPTION() in lib/test_objpool.o Add the missing invocation of the MODULE_DESCRIPTION() macro. Link: https://lkml.kernel.org/r/20240715-md-lib-test_objpool-v2-1-5a2b9369c37e@quicinc.com Signed-off-by: Jeff Johnson Reviewed-by: Matt Wu Cc: Masami Hiramatsu Signed-off-by: Andrew Morton --- lib/test_objpool.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/test_objpool.c b/lib/test_objpool.c index bfdb81599832..5a3f6961a70f 100644 --- a/lib/test_objpool.c +++ b/lib/test_objpool.c @@ -687,4 +687,5 @@ static void __exit ot_mod_exit(void) module_init(ot_mod_init); module_exit(ot_mod_exit); -MODULE_LICENSE("GPL"); \ No newline at end of file +MODULE_DESCRIPTION("Test module for lockless object pool"); +MODULE_LICENSE("GPL"); From e24f4de8a72b50b67ea116b38152bb98360f81b3 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Tue, 23 Jul 2024 00:37:26 +0200 Subject: [PATCH 004/103] kcov: don't instrument lib/find_bit.c This file produces large amounts of flaky coverage not useful for the KCOV's intended use case (guiding the fuzzing process). Link: https://lkml.kernel.org/r/20240722223726.194658-1-andrey.konovalov@linux.dev Signed-off-by: Andrey Konovalov Reviewed-by: Dmitry Vyukov Cc: Aleksandr Nogikh Cc: Alexander Potapenko Cc: Marco Elver Cc: Rasmus Villemoes Cc: Yury Norov Signed-off-by: Andrew Morton --- lib/Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/Makefile b/lib/Makefile index 322bb127b4dc..0fde1c360f32 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -14,6 +14,7 @@ KCOV_INSTRUMENT_list_debug.o := n KCOV_INSTRUMENT_debugobjects.o := n KCOV_INSTRUMENT_dynamic_debug.o := n KCOV_INSTRUMENT_fault-inject.o := n +KCOV_INSTRUMENT_find_bit.o := n # string.o implements standard library functions like memset/memcpy etc. # Use -ffreestanding to ensure that the compiler does not try to "optimize" From acf02be3c72f12c31f916d3465c4c716b4729538 Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Fri, 19 Jul 2024 12:38:52 +0200 Subject: [PATCH 005/103] kexec: use atomic_try_cmpxchg_acquire() in kexec_trylock() Use atomic_try_cmpxchg_acquire(*ptr, &old, new) instead of atomic_cmpxchg_acquire(*ptr, old, new) == old in kexec_trylock(). x86 CMPXCHG instruction returns success in ZF flag, so this change saves a compare after cmpxchg. Link: https://lkml.kernel.org/r/20240719103937.53742-1-ubizjak@gmail.com Signed-off-by: Uros Bizjak Acked-by: Baoquan He Cc: Eric Biederman Signed-off-by: Andrew Morton --- kernel/kexec_internal.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/kexec_internal.h b/kernel/kexec_internal.h index 2595defe8c0d..d35d9792402d 100644 --- a/kernel/kexec_internal.h +++ b/kernel/kexec_internal.h @@ -23,7 +23,8 @@ int kimage_is_destination_range(struct kimage *image, extern atomic_t __kexec_lock; static inline bool kexec_trylock(void) { - return atomic_cmpxchg_acquire(&__kexec_lock, 0, 1) == 0; + int old = 0; + return atomic_try_cmpxchg_acquire(&__kexec_lock, &old, 1); } static inline void kexec_unlock(void) { From dfe6c5692fb525e5e90cefe306ee0dffae13d35f Mon Sep 17 00:00:00 2001 From: Heming Zhao Date: Fri, 19 Jul 2024 19:43:10 +0800 Subject: [PATCH 006/103] ocfs2: fix the la space leak when unmounting an ocfs2 volume This bug has existed since the initial OCFS2 code. The code logic in ocfs2_sync_local_to_main() is wrong, as it ignores the last contiguous free bits, which causes an OCFS2 volume to lose the last free clusters of LA window on each umount command. Link: https://lkml.kernel.org/r/20240719114310.14245-1-heming.zhao@suse.com Signed-off-by: Heming Zhao Reviewed-by: Su Yue Reviewed-by: Joseph Qi Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Cc: Changwei Ge Cc: Gang He Cc: Jun Piao Cc: Heming Zhao Cc: Signed-off-by: Andrew Morton --- fs/ocfs2/localalloc.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c index 5df34561c551..8ac42ea81a17 100644 --- a/fs/ocfs2/localalloc.c +++ b/fs/ocfs2/localalloc.c @@ -1002,6 +1002,25 @@ static int ocfs2_sync_local_to_main(struct ocfs2_super *osb, start = bit_off + 1; } + /* clear the contiguous bits until the end boundary */ + if (count) { + blkno = la_start_blk + + ocfs2_clusters_to_blocks(osb->sb, + start - count); + + trace_ocfs2_sync_local_to_main_free( + count, start - count, + (unsigned long long)la_start_blk, + (unsigned long long)blkno); + + status = ocfs2_release_clusters(handle, + main_bm_inode, + main_bm_bh, blkno, + count); + if (status < 0) + mlog_errno(status); + } + bail: if (status) mlog_errno(status); From 22be8e6b1385d52e68b38599eadfa5d87966ab23 Mon Sep 17 00:00:00 2001 From: Lasse Collin Date: Sun, 21 Jul 2024 16:36:16 +0300 Subject: [PATCH 007/103] MAINTAINERS: add XZ Embedded maintainer Patch series "xz: Updates to license, filters, and compression options", v2. XZ Embedded, the upstream project, switched from public domain to the BSD Zero Clause License (0BSD). Now matching SPDX license identifiers can be added. Documentation was revised. Fix syntax errors in kernel-doc comments in . The xz_dec API docs from are now included in Documentation/staging/xz.rst. The new ARM64 and RISC-V filters can be used for kernel decompression if CONFIG_EFI_ZBOOT=y. The filters can be used by Squashfs too. (Userspace Squashfs-tools already had the ARM64 filter support committed but it was reverted due to backdoor fears. I try to get ARM64 and RISC-V filter support added to Squashfs-tools somewhat soon.) Account for the default threading change made in the xz command line tool version 5.6.0. Tweak kernel compression options for archs that support XZ compressed kernel. This patch (of 16): I have been the maintainer of the upstream project since I submitted the code to Linux in 2010 but I forgot to add myself to MAINTAINERS. Link: https://lkml.kernel.org/r/20240721133633.47721-1-lasse.collin@tukaani.org Link: https://lkml.kernel.org/r/20240721133633.47721-2-lasse.collin@tukaani.org Signed-off-by: Lasse Collin Reviewed-by: Sam James Cc: Albert Ou Cc: Catalin Marinas Cc: Emil Renner Berthing Cc: Greg Kroah-Hartman Cc: Herbert Xu Cc: Joel Stanley Cc: Jonathan Corbet Cc: Jubin Zhong Cc: Jules Maselbas Cc: Krzysztof Kozlowski Cc: Michael Ellerman Cc: Palmer Dabbelt Cc: Paul Walmsley Cc: Randy Dunlap Cc: Rui Li Cc: Simon Glass Cc: Thomas Gleixner Cc: Will Deacon Signed-off-by: Andrew Morton --- MAINTAINERS | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index fe83ba7194ea..8fab7a347fc0 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -25242,6 +25242,19 @@ S: Maintained F: drivers/spi/spi-xtensa-xtfpga.c F: sound/soc/xtensa/xtfpga-i2s.c +XZ EMBEDDED +M: Lasse Collin +S: Maintained +W: https://tukaani.org/xz/embedded.html +B: https://github.com/tukaani-project/xz-embedded/issues +C: irc://irc.libera.chat/tukaani +F: Documentation/staging/xz.rst +F: include/linux/decompress/unxz.h +F: include/linux/xz.h +F: lib/decompress_unxz.c +F: lib/xz/ +F: scripts/xz_wrap.sh + YAM DRIVER FOR AX.25 M: Jean-Paul Roubelat L: linux-hams@vger.kernel.org From c1ccbbaa76c989aeaecaefee1a3d890cc674b005 Mon Sep 17 00:00:00 2001 From: Lasse Collin Date: Sun, 21 Jul 2024 16:36:17 +0300 Subject: [PATCH 008/103] LICENSES: add 0BSD license text The license text was copied from: https://spdx.org/licenses/0BSD.html Link: https://lkml.kernel.org/r/20240721133633.47721-3-lasse.collin@tukaani.org Signed-off-by: Lasse Collin Reviewed-by: Sam James Cc: Thomas Gleixner Cc: Greg Kroah-Hartman Cc: Albert Ou Cc: Catalin Marinas Cc: Emil Renner Berthing Cc: Herbert Xu Cc: Joel Stanley Cc: Jonathan Corbet Cc: Jubin Zhong Cc: Jules Maselbas Cc: Krzysztof Kozlowski Cc: Michael Ellerman Cc: Palmer Dabbelt Cc: Paul Walmsley Cc: Randy Dunlap Cc: Rui Li Cc: Simon Glass Cc: Will Deacon Signed-off-by: Andrew Morton --- LICENSES/deprecated/0BSD | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) create mode 100644 LICENSES/deprecated/0BSD diff --git a/LICENSES/deprecated/0BSD b/LICENSES/deprecated/0BSD new file mode 100644 index 000000000000..e4b95b749966 --- /dev/null +++ b/LICENSES/deprecated/0BSD @@ -0,0 +1,23 @@ +Valid-License-Identifier: 0BSD +SPDX-URL: https://spdx.org/licenses/0BSD.html +Usage-Guide: + To use the BSD Zero Clause License put the following SPDX tag/value + pair into a comment according to the placement guidelines in the + licensing rules documentation: + SPDX-License-Identifier: 0BSD +License-Text: + +BSD Zero Clause License + +Copyright (c) + +Permission to use, copy, modify, and/or distribute this software for any +purpose with or without fee is hereby granted. + +THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY +SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION +OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN +CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. From 836d13a6ef8a2eb0eab2bd2de06f2deabc62b060 Mon Sep 17 00:00:00 2001 From: Lasse Collin Date: Sun, 21 Jul 2024 16:36:18 +0300 Subject: [PATCH 009/103] xz: switch from public domain to BSD Zero Clause License (0BSD) Remove the public domain notices and add SPDX license identifiers. Change MODULE_LICENSE from "GPL" to "Dual BSD/GPL" because 0BSD should count as a BSD license variant here. The switch to 0BSD was done in the upstream XZ Embedded project because public domain has (real or perceived) legal issues in some jurisdictions. Link: https://lkml.kernel.org/r/20240721133633.47721-4-lasse.collin@tukaani.org Signed-off-by: Lasse Collin Reviewed-by: Sam James Cc: Thomas Gleixner Cc: Greg Kroah-Hartman Cc: Albert Ou Cc: Catalin Marinas Cc: Emil Renner Berthing Cc: Herbert Xu Cc: Joel Stanley Cc: Jonathan Corbet Cc: Jubin Zhong Cc: Jules Maselbas Cc: Krzysztof Kozlowski Cc: Michael Ellerman Cc: Palmer Dabbelt Cc: Paul Walmsley Cc: Randy Dunlap Cc: Rui Li Cc: Simon Glass Cc: Will Deacon Signed-off-by: Andrew Morton --- include/linux/decompress/unxz.h | 5 ++--- include/linux/xz.h | 5 ++--- lib/decompress_unxz.c | 5 ++--- lib/xz/xz_crc32.c | 5 ++--- lib/xz/xz_dec_bcj.c | 5 ++--- lib/xz/xz_dec_lzma2.c | 5 ++--- lib/xz/xz_dec_stream.c | 5 ++--- lib/xz/xz_dec_syms.c | 12 +++--------- lib/xz/xz_dec_test.c | 12 +++--------- lib/xz/xz_lzma2.h | 5 ++--- lib/xz/xz_private.h | 5 ++--- lib/xz/xz_stream.h | 5 ++--- scripts/xz_wrap.sh | 5 +---- 13 files changed, 27 insertions(+), 52 deletions(-) diff --git a/include/linux/decompress/unxz.h b/include/linux/decompress/unxz.h index f764e2a7201e..3dd2658a9dab 100644 --- a/include/linux/decompress/unxz.h +++ b/include/linux/decompress/unxz.h @@ -1,10 +1,9 @@ +/* SPDX-License-Identifier: 0BSD */ + /* * Wrapper for decompressing XZ-compressed kernel, initramfs, and initrd * * Author: Lasse Collin - * - * This file has been put into the public domain. - * You can do whatever you want with this file. */ #ifndef DECOMPRESS_UNXZ_H diff --git a/include/linux/xz.h b/include/linux/xz.h index 7285ca5d56e9..5728d57aecc0 100644 --- a/include/linux/xz.h +++ b/include/linux/xz.h @@ -1,11 +1,10 @@ +/* SPDX-License-Identifier: 0BSD */ + /* * XZ decompressor * * Authors: Lasse Collin * Igor Pavlov - * - * This file has been put into the public domain. - * You can do whatever you want with this file. */ #ifndef XZ_H diff --git a/lib/decompress_unxz.c b/lib/decompress_unxz.c index 842894158944..34bb7efc0412 100644 --- a/lib/decompress_unxz.c +++ b/lib/decompress_unxz.c @@ -1,10 +1,9 @@ +// SPDX-License-Identifier: 0BSD + /* * Wrapper for decompressing XZ-compressed kernel, initramfs, and initrd * * Author: Lasse Collin - * - * This file has been put into the public domain. - * You can do whatever you want with this file. */ /* diff --git a/lib/xz/xz_crc32.c b/lib/xz/xz_crc32.c index 88a2c35e1b59..30b8a27110b1 100644 --- a/lib/xz/xz_crc32.c +++ b/lib/xz/xz_crc32.c @@ -1,11 +1,10 @@ +// SPDX-License-Identifier: 0BSD + /* * CRC32 using the polynomial from IEEE-802.3 * * Authors: Lasse Collin * Igor Pavlov - * - * This file has been put into the public domain. - * You can do whatever you want with this file. */ /* diff --git a/lib/xz/xz_dec_bcj.c b/lib/xz/xz_dec_bcj.c index ef449e97d1a1..ab9237ed6db8 100644 --- a/lib/xz/xz_dec_bcj.c +++ b/lib/xz/xz_dec_bcj.c @@ -1,11 +1,10 @@ +// SPDX-License-Identifier: 0BSD + /* * Branch/Call/Jump (BCJ) filter decoders * * Authors: Lasse Collin * Igor Pavlov - * - * This file has been put into the public domain. - * You can do whatever you want with this file. */ #include "xz_private.h" diff --git a/lib/xz/xz_dec_lzma2.c b/lib/xz/xz_dec_lzma2.c index 27ce34520e78..613939f5dd6c 100644 --- a/lib/xz/xz_dec_lzma2.c +++ b/lib/xz/xz_dec_lzma2.c @@ -1,11 +1,10 @@ +// SPDX-License-Identifier: 0BSD + /* * LZMA2 decoder * * Authors: Lasse Collin * Igor Pavlov - * - * This file has been put into the public domain. - * You can do whatever you want with this file. */ #include "xz_private.h" diff --git a/lib/xz/xz_dec_stream.c b/lib/xz/xz_dec_stream.c index 683570b93a8c..0058406ccd17 100644 --- a/lib/xz/xz_dec_stream.c +++ b/lib/xz/xz_dec_stream.c @@ -1,10 +1,9 @@ +// SPDX-License-Identifier: 0BSD + /* * .xz Stream decoder * * Author: Lasse Collin - * - * This file has been put into the public domain. - * You can do whatever you want with this file. */ #include "xz_private.h" diff --git a/lib/xz/xz_dec_syms.c b/lib/xz/xz_dec_syms.c index 61098c67a413..495d2cc2e6e8 100644 --- a/lib/xz/xz_dec_syms.c +++ b/lib/xz/xz_dec_syms.c @@ -1,10 +1,9 @@ +// SPDX-License-Identifier: 0BSD + /* * XZ decoder module information * * Author: Lasse Collin - * - * This file has been put into the public domain. - * You can do whatever you want with this file. */ #include @@ -25,9 +24,4 @@ EXPORT_SYMBOL(xz_dec_microlzma_end); MODULE_DESCRIPTION("XZ decompressor"); MODULE_VERSION("1.1"); MODULE_AUTHOR("Lasse Collin and Igor Pavlov"); - -/* - * This code is in the public domain, but in Linux it's simplest to just - * say it's GPL and consider the authors as the copyright holders. - */ -MODULE_LICENSE("GPL"); +MODULE_LICENSE("Dual BSD/GPL"); diff --git a/lib/xz/xz_dec_test.c b/lib/xz/xz_dec_test.c index da28a19d6c98..53d3600f2ddb 100644 --- a/lib/xz/xz_dec_test.c +++ b/lib/xz/xz_dec_test.c @@ -1,10 +1,9 @@ +// SPDX-License-Identifier: 0BSD + /* * XZ decoder tester * * Author: Lasse Collin - * - * This file has been put into the public domain. - * You can do whatever you want with this file. */ #include @@ -212,9 +211,4 @@ module_exit(xz_dec_test_exit); MODULE_DESCRIPTION("XZ decompressor tester"); MODULE_VERSION("1.0"); MODULE_AUTHOR("Lasse Collin "); - -/* - * This code is in the public domain, but in Linux it's simplest to just - * say it's GPL and consider the authors as the copyright holders. - */ -MODULE_LICENSE("GPL"); +MODULE_LICENSE("Dual BSD/GPL"); diff --git a/lib/xz/xz_lzma2.h b/lib/xz/xz_lzma2.h index 92d852d4f87a..d2632b7dfb9c 100644 --- a/lib/xz/xz_lzma2.h +++ b/lib/xz/xz_lzma2.h @@ -1,11 +1,10 @@ +/* SPDX-License-Identifier: 0BSD */ + /* * LZMA2 definitions * * Authors: Lasse Collin * Igor Pavlov - * - * This file has been put into the public domain. - * You can do whatever you want with this file. */ #ifndef XZ_LZMA2_H diff --git a/lib/xz/xz_private.h b/lib/xz/xz_private.h index bf1e94ec7873..2412a5d54801 100644 --- a/lib/xz/xz_private.h +++ b/lib/xz/xz_private.h @@ -1,10 +1,9 @@ +/* SPDX-License-Identifier: 0BSD */ + /* * Private includes and definitions * * Author: Lasse Collin - * - * This file has been put into the public domain. - * You can do whatever you want with this file. */ #ifndef XZ_PRIVATE_H diff --git a/lib/xz/xz_stream.h b/lib/xz/xz_stream.h index 430bb3a0d195..55f9f6f94b78 100644 --- a/lib/xz/xz_stream.h +++ b/lib/xz/xz_stream.h @@ -1,10 +1,9 @@ +/* SPDX-License-Identifier: 0BSD */ + /* * Definitions for handling the .xz file format * * Author: Lasse Collin - * - * This file has been put into the public domain. - * You can do whatever you want with this file. */ #ifndef XZ_STREAM_H diff --git a/scripts/xz_wrap.sh b/scripts/xz_wrap.sh index d06baf626abe..bb760b721b2c 100755 --- a/scripts/xz_wrap.sh +++ b/scripts/xz_wrap.sh @@ -1,13 +1,10 @@ #!/bin/sh +# SPDX-License-Identifier: 0BSD # # This is a wrapper for xz to compress the kernel image using appropriate # compression options depending on the architecture. # # Author: Lasse Collin -# -# This file has been put into the public domain. -# You can do whatever you want with this file. -# BCJ= LZMA2OPTS= From ff221153aafa08159f3dcc187c6f3a7a837e1c3d Mon Sep 17 00:00:00 2001 From: Lasse Collin Date: Sun, 21 Jul 2024 16:36:19 +0300 Subject: [PATCH 010/103] xz: fix comments and coding style - Fix comments that were no longer in sync with the code below them. - Fix language errors. - Fix coding style. Link: https://lkml.kernel.org/r/20240721133633.47721-5-lasse.collin@tukaani.org Signed-off-by: Lasse Collin Reviewed-by: Sam James Cc: Albert Ou Cc: Catalin Marinas Cc: Emil Renner Berthing Cc: Greg Kroah-Hartman Cc: Herbert Xu Cc: Joel Stanley Cc: Jonathan Corbet Cc: Jubin Zhong Cc: Jules Maselbas Cc: Krzysztof Kozlowski Cc: Michael Ellerman Cc: Palmer Dabbelt Cc: Paul Walmsley Cc: Randy Dunlap Cc: Rui Li Cc: Simon Glass Cc: Thomas Gleixner Cc: Will Deacon Signed-off-by: Andrew Morton --- lib/decompress_unxz.c | 20 ++++++++++---------- lib/xz/Kconfig | 3 ++- scripts/Makefile.lib | 13 ++++++++----- 3 files changed, 20 insertions(+), 16 deletions(-) diff --git a/lib/decompress_unxz.c b/lib/decompress_unxz.c index 34bb7efc0412..46aa3be13fc5 100644 --- a/lib/decompress_unxz.c +++ b/lib/decompress_unxz.c @@ -102,7 +102,7 @@ #ifdef STATIC # define XZ_PREBOOT #else -#include +# include #endif #ifdef __KERNEL__ # include @@ -219,7 +219,7 @@ void *memmove(void *dest, const void *src, size_t size) #endif /* - * Since we need memmove anyway, would use it as memcpy too. + * Since we need memmove anyway, we could use it as memcpy too. * Commented out for now to avoid breaking things. */ /* @@ -389,17 +389,17 @@ error_alloc_state: } /* - * This macro is used by architecture-specific files to decompress + * This function is used by architecture-specific files to decompress * the kernel image. */ #ifdef XZ_PREBOOT -STATIC int INIT __decompress(unsigned char *buf, long len, - long (*fill)(void*, unsigned long), - long (*flush)(void*, unsigned long), - unsigned char *out_buf, long olen, - long *pos, - void (*error)(char *x)) +STATIC int INIT __decompress(unsigned char *in, long in_size, + long (*fill)(void *dest, unsigned long size), + long (*flush)(void *src, unsigned long size), + unsigned char *out, long out_size, + long *in_used, + void (*error)(char *x)) { - return unxz(buf, len, fill, flush, out_buf, pos, error); + return unxz(in, in_size, fill, flush, out, in_used, error); } #endif diff --git a/lib/xz/Kconfig b/lib/xz/Kconfig index aef086a6bf2f..6b80453d8f54 100644 --- a/lib/xz/Kconfig +++ b/lib/xz/Kconfig @@ -5,7 +5,8 @@ config XZ_DEC help LZMA2 compression algorithm and BCJ filters are supported using the .xz file format as the container. For integrity checking, - CRC32 is supported. See Documentation/staging/xz.rst for more information. + CRC32 is supported. See Documentation/staging/xz.rst for more + information. if XZ_DEC diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib index 207325eaf1d1..dae2089e7bc6 100644 --- a/scripts/Makefile.lib +++ b/scripts/Makefile.lib @@ -530,14 +530,17 @@ quiet_cmd_fit = FIT $@ # XZ # --------------------------------------------------------------------------- -# Use xzkern to compress the kernel image and xzmisc to compress other things. +# Use xzkern or xzkern_with_size to compress the kernel image and xzmisc to +# compress other things. # # xzkern uses a big LZMA2 dictionary since it doesn't increase memory usage # of the kernel decompressor. A BCJ filter is used if it is available for -# the target architecture. xzkern also appends uncompressed size of the data -# using size_append. The .xz format has the size information available at -# the end of the file too, but it's in more complex format and it's good to -# avoid changing the part of the boot code that reads the uncompressed size. +# the target architecture. +# +# xzkern_with_size also appends uncompressed size of the data using +# size_append. The .xz format has the size information available at the end +# of the file too, but it's in more complex format and it's good to avoid +# changing the part of the boot code that reads the uncompressed size. # Note that the bytes added by size_append will make the xz tool think that # the file is corrupt. This is expected. # From ad8c67b870d108aa1286f4fc76d0c29a736fd75e Mon Sep 17 00:00:00 2001 From: Lasse Collin Date: Sun, 21 Jul 2024 16:36:20 +0300 Subject: [PATCH 011/103] xz: fix kernel-doc formatting errors in xz.h The opaque structs xz_dec and xz_dec_microlzma are declared in xz.h but their definitions are in xz_dec_lzma2.c without kernel-doc comments. Use regular comments for these structs in xz.h to avoid errors when building the docs. Add a few missing colons. Link: https://lkml.kernel.org/r/20240721133633.47721-6-lasse.collin@tukaani.org Signed-off-by: Lasse Collin Reviewed-by: Sam James Cc: Albert Ou Cc: Catalin Marinas Cc: Emil Renner Berthing Cc: Greg Kroah-Hartman Cc: Herbert Xu Cc: Joel Stanley Cc: Jonathan Corbet Cc: Jubin Zhong Cc: Jules Maselbas Cc: Krzysztof Kozlowski Cc: Michael Ellerman Cc: Palmer Dabbelt Cc: Paul Walmsley Cc: Randy Dunlap Cc: Rui Li Cc: Simon Glass Cc: Thomas Gleixner Cc: Will Deacon Signed-off-by: Andrew Morton --- include/linux/xz.h | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/include/linux/xz.h b/include/linux/xz.h index 5728d57aecc0..af1e075d9add 100644 --- a/include/linux/xz.h +++ b/include/linux/xz.h @@ -142,7 +142,7 @@ struct xz_buf { size_t out_size; }; -/** +/* * struct xz_dec - Opaque type to hold the XZ decoder state */ struct xz_dec; @@ -240,15 +240,16 @@ XZ_EXTERN void xz_dec_end(struct xz_dec *s); * marked with XZ_EXTERN. This avoids warnings about static functions that * are never defined. */ -/** + +/* * struct xz_dec_microlzma - Opaque type to hold the MicroLZMA decoder state */ struct xz_dec_microlzma; /** * xz_dec_microlzma_alloc() - Allocate memory for the MicroLZMA decoder - * @mode XZ_SINGLE or XZ_PREALLOC - * @dict_size LZMA dictionary size. This must be at least 4 KiB and + * @mode: XZ_SINGLE or XZ_PREALLOC + * @dict_size: LZMA dictionary size. This must be at least 4 KiB and * at most 3 GiB. * * In contrast to xz_dec_init(), this function only allocates the memory @@ -276,15 +277,15 @@ extern struct xz_dec_microlzma *xz_dec_microlzma_alloc(enum xz_mode mode, /** * xz_dec_microlzma_reset() - Reset the MicroLZMA decoder state - * @s Decoder state allocated using xz_dec_microlzma_alloc() - * @comp_size Compressed size of the input stream - * @uncomp_size Uncompressed size of the input stream. A value smaller + * @s: Decoder state allocated using xz_dec_microlzma_alloc() + * @comp_size: Compressed size of the input stream + * @uncomp_size: Uncompressed size of the input stream. A value smaller * than the real uncompressed size of the input stream can * be specified if uncomp_size_is_exact is set to false. * uncomp_size can never be set to a value larger than the * expected real uncompressed size because it would eventually * result in XZ_DATA_ERROR. - * @uncomp_size_is_exact This is an int instead of bool to avoid + * @uncomp_size_is_exact: This is an int instead of bool to avoid * requiring stdbool.h. This should normally be set to true. * When this is set to false, error detection is weaker. */ @@ -294,7 +295,7 @@ extern void xz_dec_microlzma_reset(struct xz_dec_microlzma *s, /** * xz_dec_microlzma_run() - Run the MicroLZMA decoder - * @s Decoder state initialized using xz_dec_microlzma_reset() + * @s: Decoder state initialized using xz_dec_microlzma_reset() * @b: Input and output buffers * * This works similarly to xz_dec_run() with a few important differences. From 0f2c5996340b69d167d3f6ca38d3012204787ac1 Mon Sep 17 00:00:00 2001 From: Lasse Collin Date: Sun, 21 Jul 2024 16:36:21 +0300 Subject: [PATCH 012/103] xz: improve the MicroLZMA kernel-doc in xz.h Move the description of the format into a "DOC:" comment. Emphasize that MicroLZMA functions aren't usually needed. Link: https://lkml.kernel.org/r/20240721133633.47721-7-lasse.collin@tukaani.org Signed-off-by: Lasse Collin Reviewed-by: Sam James Cc: Albert Ou Cc: Catalin Marinas Cc: Emil Renner Berthing Cc: Greg Kroah-Hartman Cc: Herbert Xu Cc: Joel Stanley Cc: Jonathan Corbet Cc: Jubin Zhong Cc: Jules Maselbas Cc: Krzysztof Kozlowski Cc: Michael Ellerman Cc: Palmer Dabbelt Cc: Paul Walmsley Cc: Randy Dunlap Cc: Rui Li Cc: Simon Glass Cc: Thomas Gleixner Cc: Will Deacon Signed-off-by: Andrew Morton --- include/linux/xz.h | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/include/linux/xz.h b/include/linux/xz.h index af1e075d9add..701d62c02b9a 100644 --- a/include/linux/xz.h +++ b/include/linux/xz.h @@ -232,9 +232,18 @@ XZ_EXTERN void xz_dec_reset(struct xz_dec *s); */ XZ_EXTERN void xz_dec_end(struct xz_dec *s); -/* - * Decompressor for MicroLZMA, an LZMA variant with a very minimal header. - * See xz_dec_microlzma_alloc() below for details. +/** + * DOC: MicroLZMA decompressor + * + * This MicroLZMA header format was created for use in EROFS but may be used + * by others too. **In most cases one needs the XZ APIs above instead.** + * + * The compressed format supported by this decoder is a raw LZMA stream + * whose first byte (always 0x00) has been replaced with bitwise-negation + * of the LZMA properties (lc/lp/pb) byte. For example, if lc/lp/pb is + * 3/0/2, the first byte is 0xA2. This way the first byte can never be 0x00. + * Just like with LZMA2, lc + lp <= 4 must be true. The LZMA end-of-stream + * marker must not be used. The unused values are reserved for future use. * * These functions aren't used or available in preboot code and thus aren't * marked with XZ_EXTERN. This avoids warnings about static functions that @@ -262,15 +271,6 @@ struct xz_dec_microlzma; * On success, xz_dec_microlzma_alloc() returns a pointer to * struct xz_dec_microlzma. If memory allocation fails or * dict_size is invalid, NULL is returned. - * - * The compressed format supported by this decoder is a raw LZMA stream - * whose first byte (always 0x00) has been replaced with bitwise-negation - * of the LZMA properties (lc/lp/pb) byte. For example, if lc/lp/pb is - * 3/0/2, the first byte is 0xA2. This way the first byte can never be 0x00. - * Just like with LZMA2, lc + lp <= 4 must be true. The LZMA end-of-stream - * marker must not be used. The unused values are reserved for future use. - * This MicroLZMA header format was created for use in EROFS but may be used - * by others too. */ extern struct xz_dec_microlzma *xz_dec_microlzma_alloc(enum xz_mode mode, uint32_t dict_size); From 64167246791eb38af4cbe8bc93fc2701c71fd17e Mon Sep 17 00:00:00 2001 From: Lasse Collin Date: Sun, 21 Jul 2024 16:36:22 +0300 Subject: [PATCH 013/103] xz: Documentation/staging/xz.rst: Revise thoroughly Add SPDX license identifier. Omit xz_dec_test info. That isn't relevant to developers of non-XZ code. Revise the docs about xzkern and add xzkern_with_size. The latter was added to scripts/Makefile.lib in the commit 7ce7e984ab2b ("kbuild: rename cmd_{bzip2,lzma,lzo,lz4,xzkern,zstd22}"). Omit contact info as MAINTAINERS has it. Omit other info that is outdated or not relevant in the kernel context. Include the xz_dec kernel-doc from include/linux/xz.h. Link: https://lkml.kernel.org/r/20240721133633.47721-8-lasse.collin@tukaani.org Signed-off-by: Lasse Collin Reviewed-by: Sam James Reviewed-by: Randy Dunlap Tested-by: Randy Dunlap Cc: Rui Li Cc: Albert Ou Cc: Catalin Marinas Cc: Emil Renner Berthing Cc: Greg Kroah-Hartman Cc: Herbert Xu Cc: Joel Stanley Cc: Jonathan Corbet Cc: Jubin Zhong Cc: Jules Maselbas Cc: Krzysztof Kozlowski Cc: Michael Ellerman Cc: Palmer Dabbelt Cc: Paul Walmsley Cc: Simon Glass Cc: Thomas Gleixner Cc: Will Deacon Signed-off-by: Andrew Morton --- Documentation/staging/xz.rst | 142 ++++++++++++++--------------------- 1 file changed, 58 insertions(+), 84 deletions(-) diff --git a/Documentation/staging/xz.rst b/Documentation/staging/xz.rst index b2f5ff12a161..e1054e9a8e65 100644 --- a/Documentation/staging/xz.rst +++ b/Documentation/staging/xz.rst @@ -1,3 +1,5 @@ +.. SPDX-License-Identifier: 0BSD + ============================ XZ data compression in Linux ============================ @@ -6,62 +8,55 @@ Introduction ============ XZ is a general purpose data compression format with high compression -ratio and relatively fast decompression. The primary compression -algorithm (filter) is LZMA2. Additional filters can be used to improve -compression ratio even further. E.g. Branch/Call/Jump (BCJ) filters -improve compression ratio of executable data. +ratio. The XZ decompressor in Linux is called XZ Embedded. It supports +the LZMA2 filter and optionally also Branch/Call/Jump (BCJ) filters +for executable code. CRC32 is supported for integrity checking. -The XZ decompressor in Linux is called XZ Embedded. It supports -the LZMA2 filter and optionally also BCJ filters. CRC32 is supported -for integrity checking. The home page of XZ Embedded is at -, where you can find the -latest version and also information about using the code outside -the Linux kernel. +See the `XZ Embedded`_ home page for the latest version which includes +a few optional extra features that aren't required in the Linux kernel +and information about using the code outside the Linux kernel. -For userspace, XZ Utils provide a zlib-like compression library -and a gzip-like command line tool. XZ Utils can be downloaded from -. +For userspace, `XZ Utils`_ provide a zlib-like compression library +and a gzip-like command line tool. + +.. _XZ Embedded: https://tukaani.org/xz/embedded.html +.. _XZ Utils: https://tukaani.org/xz/ XZ related components in the kernel =================================== The xz_dec module provides XZ decompressor with single-call (buffer -to buffer) and multi-call (stateful) APIs. The usage of the xz_dec -module is documented in include/linux/xz.h. - -The xz_dec_test module is for testing xz_dec. xz_dec_test is not -useful unless you are hacking the XZ decompressor. xz_dec_test -allocates a char device major dynamically to which one can write -.xz files from userspace. The decompressed output is thrown away. -Keep an eye on dmesg to see diagnostics printed by xz_dec_test. -See the xz_dec_test source code for the details. +to buffer) and multi-call (stateful) APIs in include/linux/xz.h. For decompressing the kernel image, initramfs, and initrd, there is a wrapper function in lib/decompress_unxz.c. Its API is the same as in other decompress_*.c files, which is defined in include/linux/decompress/generic.h. -scripts/xz_wrap.sh is a wrapper for the xz command line tool found -from XZ Utils. The wrapper sets compression options to values suitable -for compressing the kernel image. +For kernel makefiles, three commands are provided for use with +``$(call if_changed)``. They require the xz tool from XZ Utils. -For kernel makefiles, two commands are provided for use with -$(call if_needed). The kernel image should be compressed with -$(call if_needed,xzkern) which will use a BCJ filter and a big LZMA2 -dictionary. It will also append a four-byte trailer containing the -uncompressed size of the file, which is needed by the boot code. -Other things should be compressed with $(call if_needed,xzmisc) -which will use no BCJ filter and 1 MiB LZMA2 dictionary. +- ``$(call if_changed,xzkern)`` is for compressing the kernel image. + It runs the script scripts/xz_wrap.sh which uses arch-optimized + options and a big LZMA2 dictionary. + +- ``$(call if_changed,xzkern_with_size)`` is like ``xzkern`` above but + this also appends a four-byte trailer containing the uncompressed size + of the file. The trailer is needed by the boot code on some archs. + +- Other things can be compressed with ``$(call if_needed,xzmisc)`` + which will use no BCJ filter and 1 MiB LZMA2 dictionary. Notes on compression options ============================ -Since the XZ Embedded supports only streams with no integrity check or -CRC32, make sure that you don't use some other integrity check type -when encoding files that are supposed to be decoded by the kernel. With -liblzma, you need to use either LZMA_CHECK_NONE or LZMA_CHECK_CRC32 -when encoding. With the xz command line tool, use --check=none or ---check=crc32. +Since the XZ Embedded supports only streams with CRC32 or no integrity +check, make sure that you don't use some other integrity check type +when encoding files that are supposed to be decoded by the kernel. +With liblzma from XZ Utils, you need to use either ``LZMA_CHECK_CRC32`` +or ``LZMA_CHECK_NONE`` when encoding. With the ``xz`` command line tool, +use ``--check=crc32`` or ``--check=none`` to override the default +``--check=crc64``. Using CRC32 is strongly recommended unless there is some other layer which will verify the integrity of the uncompressed data anyway. @@ -71,57 +66,36 @@ by the decoder; you can only change the integrity check type (or disable it) for the actual uncompressed data. In userspace, LZMA2 is typically used with dictionary sizes of several -megabytes. The decoder needs to have the dictionary in RAM, thus big -dictionaries cannot be used for files that are intended to be decoded -by the kernel. 1 MiB is probably the maximum reasonable dictionary -size for in-kernel use (maybe more is OK for initramfs). The presets -in XZ Utils may not be optimal when creating files for the kernel, -so don't hesitate to use custom settings. Example:: +megabytes. The decoder needs to have the dictionary in RAM: - xz --check=crc32 --lzma2=dict=512KiB inputfile +- In multi-call mode the dictionary is allocated as part of the + decoder state. The reasonable maximum dictionary size for in-kernel + use will depend on the target hardware: a few megabytes is fine for + desktop systems while 64 KiB to 1 MiB might be more appropriate on + some embedded systems. -An exception to above dictionary size limitation is when the decoder -is used in single-call mode. Decompressing the kernel itself is an -example of this situation. In single-call mode, the memory usage -doesn't depend on the dictionary size, and it is perfectly fine to -use a big dictionary: for maximum compression, the dictionary should -be at least as big as the uncompressed data itself. +- In single-call mode the output buffer is used as the dictionary + buffer. That is, the size of the dictionary doesn't affect the + decompressor memory usage at all. Only the base data structures + are allocated which take a little less than 30 KiB of memory. + For the best compression, the dictionary should be at least + as big as the uncompressed data. A notable example of single-call + mode is decompressing the kernel itself (except on PowerPC). -Future plans -============ +The compression presets in XZ Utils may not be optimal when creating +files for the kernel, so don't hesitate to use custom settings to, +for example, set the dictionary size. Also, xz may produce a smaller +file in single-threaded mode so setting that explicitly is recommended. +Example:: -Creating a limited XZ encoder may be considered if people think it is -useful. LZMA2 is slower to compress than e.g. Deflate or LZO even at -the fastest settings, so it isn't clear if LZMA2 encoder is wanted -into the kernel. + xz --threads=1 --check=crc32 --lzma2=dict=512KiB inputfile -Support for limited random-access reading is planned for the -decompression code. I don't know if it could have any use in the -kernel, but I know that it would be useful in some embedded projects -outside the Linux kernel. +xz_dec API +========== -Conformance to the .xz file format specification -================================================ +This is available with ``#include ``. -There are a couple of corner cases where things have been simplified -at expense of detecting errors as early as possible. These should not -matter in practice all, since they don't cause security issues. But -it is good to know this if testing the code e.g. with the test files -from XZ Utils. +``XZ_EXTERN`` is a macro used in the preboot code. Ignore it when +reading this documentation. -Reporting bugs -============== - -Before reporting a bug, please check that it's not fixed already -at upstream. See to get the -latest code. - -Report bugs to or visit #tukaani on -Freenode and talk to Larhzu. I don't actively read LKML or other -kernel-related mailing lists, so if there's something I should know, -you should email to me personally or use IRC. - -Don't bother Igor Pavlov with questions about the XZ implementation -in the kernel or about XZ Utils. While these two implementations -include essential code that is directly based on Igor Pavlov's code, -these implementations aren't maintained nor supported by him. +.. kernel-doc:: include/linux/xz.h From 2ee96abef214550d9e92f5143ee3ac1fd1323e67 Mon Sep 17 00:00:00 2001 From: Lasse Collin Date: Sun, 21 Jul 2024 16:36:24 +0300 Subject: [PATCH 014/103] xz: cleanup CRC32 edits from 2018 In 2018, a dependency on was added to avoid duplicating the same constant in multiple files. Two months later it was found to be a bad idea and the definition of CRC32_POLY_LE macro was moved into xz_private.h to avoid including . xz_private.h is a wrong place for it too. Revert back to the upstream version which has the poly in xz_crc32_init() in xz_crc32.c. Link: https://lkml.kernel.org/r/20240721133633.47721-10-lasse.collin@tukaani.org Fixes: faa16bc404d7 ("lib: Use existing define with polynomial") Fixes: 242cdad873a7 ("lib/xz: Put CRC32_POLY_LE in xz_private.h") Signed-off-by: Lasse Collin Reviewed-by: Sam James Tested-by: Michael Ellerman (powerpc) Cc: Krzysztof Kozlowski Cc: Herbert Xu Cc: Joel Stanley Cc: Albert Ou Cc: Catalin Marinas Cc: Emil Renner Berthing Cc: Greg Kroah-Hartman Cc: Jonathan Corbet Cc: Jubin Zhong Cc: Jules Maselbas Cc: Palmer Dabbelt Cc: Paul Walmsley Cc: Randy Dunlap Cc: Rui Li Cc: Simon Glass Cc: Thomas Gleixner Cc: Will Deacon Signed-off-by: Andrew Morton --- lib/xz/xz_crc32.c | 2 +- lib/xz/xz_private.h | 4 ---- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/lib/xz/xz_crc32.c b/lib/xz/xz_crc32.c index 30b8a27110b1..effdf34ec48d 100644 --- a/lib/xz/xz_crc32.c +++ b/lib/xz/xz_crc32.c @@ -28,7 +28,7 @@ STATIC_RW_DATA uint32_t xz_crc32_table[256]; XZ_EXTERN void xz_crc32_init(void) { - const uint32_t poly = CRC32_POLY_LE; + const uint32_t poly = 0xEDB88320; uint32_t i; uint32_t j; diff --git a/lib/xz/xz_private.h b/lib/xz/xz_private.h index 2412a5d54801..811add814ae4 100644 --- a/lib/xz/xz_private.h +++ b/lib/xz/xz_private.h @@ -104,10 +104,6 @@ # endif #endif -#ifndef CRC32_POLY_LE -#define CRC32_POLY_LE 0xedb88320 -#endif - /* * Allocate memory for LZMA2 decoder. xz_dec_lzma2_reset() must be used * before calling xz_dec_lzma2_run(). From bdfc0411717d52b9d2f00e48c452a61389814693 Mon Sep 17 00:00:00 2001 From: Lasse Collin Date: Sun, 21 Jul 2024 16:36:25 +0300 Subject: [PATCH 015/103] xz: optimize for-loop conditions in the BCJ decoders Compilers cannot optimize the addition "i + 4" away since theoretically it could overflow. Link: https://lkml.kernel.org/r/20240721133633.47721-11-lasse.collin@tukaani.org Signed-off-by: Lasse Collin Reviewed-by: Sam James Cc: Albert Ou Cc: Catalin Marinas Cc: Emil Renner Berthing Cc: Greg Kroah-Hartman Cc: Herbert Xu Cc: Joel Stanley Cc: Jonathan Corbet Cc: Jubin Zhong Cc: Jules Maselbas Cc: Krzysztof Kozlowski Cc: Michael Ellerman Cc: Palmer Dabbelt Cc: Paul Walmsley Cc: Randy Dunlap Cc: Rui Li Cc: Simon Glass Cc: Thomas Gleixner Cc: Will Deacon Signed-off-by: Andrew Morton --- lib/xz/xz_dec_bcj.c | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/lib/xz/xz_dec_bcj.c b/lib/xz/xz_dec_bcj.c index ab9237ed6db8..e0b4bf4999c0 100644 --- a/lib/xz/xz_dec_bcj.c +++ b/lib/xz/xz_dec_bcj.c @@ -161,7 +161,9 @@ static size_t bcj_powerpc(struct xz_dec_bcj *s, uint8_t *buf, size_t size) size_t i; uint32_t instr; - for (i = 0; i + 4 <= size; i += 4) { + size &= ~(size_t)3; + + for (i = 0; i < size; i += 4) { instr = get_unaligned_be32(buf + i); if ((instr & 0xFC000003) == 0x48000001) { instr &= 0x03FFFFFC; @@ -218,7 +220,9 @@ static size_t bcj_ia64(struct xz_dec_bcj *s, uint8_t *buf, size_t size) /* Instruction normalized with bit_res for easier manipulation */ uint64_t norm; - for (i = 0; i + 16 <= size; i += 16) { + size &= ~(size_t)15; + + for (i = 0; i < size; i += 16) { mask = branch_table[buf[i] & 0x1F]; for (slot = 0, bit_pos = 5; slot < 3; ++slot, bit_pos += 41) { if (((mask >> slot) & 1) == 0) @@ -266,7 +270,9 @@ static size_t bcj_arm(struct xz_dec_bcj *s, uint8_t *buf, size_t size) size_t i; uint32_t addr; - for (i = 0; i + 4 <= size; i += 4) { + size &= ~(size_t)3; + + for (i = 0; i < size; i += 4) { if (buf[i + 3] == 0xEB) { addr = (uint32_t)buf[i] | ((uint32_t)buf[i + 1] << 8) | ((uint32_t)buf[i + 2] << 16); @@ -289,7 +295,12 @@ static size_t bcj_armthumb(struct xz_dec_bcj *s, uint8_t *buf, size_t size) size_t i; uint32_t addr; - for (i = 0; i + 4 <= size; i += 2) { + if (size < 4) + return 0; + + size -= 4; + + for (i = 0; i <= size; i += 2) { if ((buf[i + 1] & 0xF8) == 0xF0 && (buf[i + 3] & 0xF8) == 0xF8) { addr = (((uint32_t)buf[i + 1] & 0x07) << 19) @@ -317,7 +328,9 @@ static size_t bcj_sparc(struct xz_dec_bcj *s, uint8_t *buf, size_t size) size_t i; uint32_t instr; - for (i = 0; i + 4 <= size; i += 4) { + size &= ~(size_t)3; + + for (i = 0; i < size; i += 4) { instr = get_unaligned_be32(buf + i); if ((instr >> 22) == 0x100 || (instr >> 22) == 0x1FF) { instr <<= 2; From 4b62813f5e7d44a33ebd74f03da041712c702bf0 Mon Sep 17 00:00:00 2001 From: Lasse Collin Date: Sun, 21 Jul 2024 16:36:26 +0300 Subject: [PATCH 016/103] xz: Add ARM64 BCJ filter Also omit a duplicated check for XZ_DEC_ARM in xz_private.h. A later commit updates lib/decompress_unxz.c to enable this filter for kernel decompression. lib/decompress_unxz.c is already used if CONFIG_EFI_ZBOOT=y && CONFIG_KERNEL_XZ=y. This filter can be used by Squashfs without modifications to the Squashfs kernel code (only needs support in userspace Squashfs-tools). Link: https://lkml.kernel.org/r/20240721133633.47721-12-lasse.collin@tukaani.org Signed-off-by: Lasse Collin Reviewed-by: Sam James Cc: Albert Ou Cc: Catalin Marinas Cc: Emil Renner Berthing Cc: Greg Kroah-Hartman Cc: Herbert Xu Cc: Joel Stanley Cc: Jonathan Corbet Cc: Jubin Zhong Cc: Jules Maselbas Cc: Krzysztof Kozlowski Cc: Michael Ellerman Cc: Palmer Dabbelt Cc: Paul Walmsley Cc: Randy Dunlap Cc: Rui Li Cc: Simon Glass Cc: Thomas Gleixner Cc: Will Deacon Signed-off-by: Andrew Morton --- lib/xz/Kconfig | 5 +++++ lib/xz/xz_dec_bcj.c | 52 ++++++++++++++++++++++++++++++++++++++++++++- lib/xz/xz_private.h | 7 ++++-- 3 files changed, 61 insertions(+), 3 deletions(-) diff --git a/lib/xz/Kconfig b/lib/xz/Kconfig index 6b80453d8f54..1166627a87dc 100644 --- a/lib/xz/Kconfig +++ b/lib/xz/Kconfig @@ -30,6 +30,11 @@ config XZ_DEC_ARMTHUMB default y select XZ_DEC_BCJ +config XZ_DEC_ARM64 + bool "ARM64 BCJ filter decoder" if EXPERT + default y + select XZ_DEC_BCJ + config XZ_DEC_SPARC bool "SPARC BCJ filter decoder" if EXPERT default y diff --git a/lib/xz/xz_dec_bcj.c b/lib/xz/xz_dec_bcj.c index e0b4bf4999c0..941198a8a55b 100644 --- a/lib/xz/xz_dec_bcj.c +++ b/lib/xz/xz_dec_bcj.c @@ -23,7 +23,8 @@ struct xz_dec_bcj { BCJ_IA64 = 6, /* Big or little endian */ BCJ_ARM = 7, /* Little endian only */ BCJ_ARMTHUMB = 8, /* Little endian only */ - BCJ_SPARC = 9 /* Big or little endian */ + BCJ_SPARC = 9, /* Big or little endian */ + BCJ_ARM64 = 10 /* AArch64 */ } type; /* @@ -346,6 +347,47 @@ static size_t bcj_sparc(struct xz_dec_bcj *s, uint8_t *buf, size_t size) } #endif +#ifdef XZ_DEC_ARM64 +static size_t bcj_arm64(struct xz_dec_bcj *s, uint8_t *buf, size_t size) +{ + size_t i; + uint32_t instr; + uint32_t addr; + + size &= ~(size_t)3; + + for (i = 0; i < size; i += 4) { + instr = get_unaligned_le32(buf + i); + + if ((instr >> 26) == 0x25) { + /* BL instruction */ + addr = instr - ((s->pos + (uint32_t)i) >> 2); + instr = 0x94000000 | (addr & 0x03FFFFFF); + put_unaligned_le32(instr, buf + i); + + } else if ((instr & 0x9F000000) == 0x90000000) { + /* ADRP instruction */ + addr = ((instr >> 29) & 3) | ((instr >> 3) & 0x1FFFFC); + + /* Only convert values in the range +/-512 MiB. */ + if ((addr + 0x020000) & 0x1C0000) + continue; + + addr -= (s->pos + (uint32_t)i) >> 12; + + instr &= 0x9000001F; + instr |= (addr & 3) << 29; + instr |= (addr & 0x03FFFC) << 3; + instr |= (0U - (addr & 0x020000)) & 0xE00000; + + put_unaligned_le32(instr, buf + i); + } + } + + return i; +} +#endif + /* * Apply the selected BCJ filter. Update *pos and s->pos to match the amount * of data that got filtered. @@ -392,6 +434,11 @@ static void bcj_apply(struct xz_dec_bcj *s, case BCJ_SPARC: filtered = bcj_sparc(s, buf, size); break; +#endif +#ifdef XZ_DEC_ARM64 + case BCJ_ARM64: + filtered = bcj_arm64(s, buf, size); + break; #endif default: /* Never reached but silence compiler warnings. */ @@ -565,6 +612,9 @@ XZ_EXTERN enum xz_ret xz_dec_bcj_reset(struct xz_dec_bcj *s, uint8_t id) #endif #ifdef XZ_DEC_SPARC case BCJ_SPARC: +#endif +#ifdef XZ_DEC_ARM64 + case BCJ_ARM64: #endif break; diff --git a/lib/xz/xz_private.h b/lib/xz/xz_private.h index 811add814ae4..307e0de8c260 100644 --- a/lib/xz/xz_private.h +++ b/lib/xz/xz_private.h @@ -36,6 +36,9 @@ # ifdef CONFIG_XZ_DEC_SPARC # define XZ_DEC_SPARC # endif +# ifdef CONFIG_XZ_DEC_ARM64 +# define XZ_DEC_ARM64 +# endif # ifdef CONFIG_XZ_DEC_MICROLZMA # define XZ_DEC_MICROLZMA # endif @@ -97,9 +100,9 @@ */ #ifndef XZ_DEC_BCJ # if defined(XZ_DEC_X86) || defined(XZ_DEC_POWERPC) \ - || defined(XZ_DEC_IA64) || defined(XZ_DEC_ARM) \ + || defined(XZ_DEC_IA64) \ || defined(XZ_DEC_ARM) || defined(XZ_DEC_ARMTHUMB) \ - || defined(XZ_DEC_SPARC) + || defined(XZ_DEC_SPARC) || defined(XZ_DEC_ARM64) # define XZ_DEC_BCJ # endif #endif From 93d09773d1a5339160e23906c68c42644e13e3d8 Mon Sep 17 00:00:00 2001 From: Lasse Collin Date: Sun, 21 Jul 2024 16:36:27 +0300 Subject: [PATCH 017/103] xz: add RISC-V BCJ filter A later commit updates lib/decompress_unxz.c to enable this filter for kernel decompression. lib/decompress_unxz.c is already used if CONFIG_EFI_ZBOOT=y && CONFIG_KERNEL_XZ=y. This filter can be used by Squashfs without modifications to the Squashfs kernel code (only needs support in userspace Squashfs-tools). Link: https://lkml.kernel.org/r/20240721133633.47721-13-lasse.collin@tukaani.org Signed-off-by: Lasse Collin Reviewed-by: Sam James Cc: Albert Ou Cc: Catalin Marinas Cc: Emil Renner Berthing Cc: Greg Kroah-Hartman Cc: Herbert Xu Cc: Joel Stanley Cc: Jonathan Corbet Cc: Jubin Zhong Cc: Jules Maselbas Cc: Krzysztof Kozlowski Cc: Michael Ellerman Cc: Palmer Dabbelt Cc: Paul Walmsley Cc: Randy Dunlap Cc: Rui Li Cc: Simon Glass Cc: Thomas Gleixner Cc: Will Deacon Signed-off-by: Andrew Morton --- lib/xz/Kconfig | 5 +++ lib/xz/xz_dec_bcj.c | 104 ++++++++++++++++++++++++++++++++++++++++++- lib/xz/xz_dec_syms.c | 2 +- lib/xz/xz_private.h | 6 ++- 4 files changed, 114 insertions(+), 3 deletions(-) diff --git a/lib/xz/Kconfig b/lib/xz/Kconfig index 1166627a87dc..20aa459bfb3e 100644 --- a/lib/xz/Kconfig +++ b/lib/xz/Kconfig @@ -40,6 +40,11 @@ config XZ_DEC_SPARC default y select XZ_DEC_BCJ +config XZ_DEC_RISCV + bool "RISC-V BCJ filter decoder" if EXPERT + default y + select XZ_DEC_BCJ + config XZ_DEC_MICROLZMA bool "MicroLZMA decoder" default n diff --git a/lib/xz/xz_dec_bcj.c b/lib/xz/xz_dec_bcj.c index 941198a8a55b..42d7f268726f 100644 --- a/lib/xz/xz_dec_bcj.c +++ b/lib/xz/xz_dec_bcj.c @@ -24,7 +24,8 @@ struct xz_dec_bcj { BCJ_ARM = 7, /* Little endian only */ BCJ_ARMTHUMB = 8, /* Little endian only */ BCJ_SPARC = 9, /* Big or little endian */ - BCJ_ARM64 = 10 /* AArch64 */ + BCJ_ARM64 = 10, /* AArch64 */ + BCJ_RISCV = 11 /* RV32GQC_Zfh, RV64GQC_Zfh */ } type; /* @@ -388,6 +389,99 @@ static size_t bcj_arm64(struct xz_dec_bcj *s, uint8_t *buf, size_t size) } #endif +#ifdef XZ_DEC_RISCV +static size_t bcj_riscv(struct xz_dec_bcj *s, uint8_t *buf, size_t size) +{ + size_t i; + uint32_t b1; + uint32_t b2; + uint32_t b3; + uint32_t instr; + uint32_t instr2; + uint32_t instr2_rs1; + uint32_t addr; + + if (size < 8) + return 0; + + size -= 8; + + for (i = 0; i <= size; i += 2) { + instr = buf[i]; + + if (instr == 0xEF) { + /* JAL */ + b1 = buf[i + 1]; + if ((b1 & 0x0D) != 0) + continue; + + b2 = buf[i + 2]; + b3 = buf[i + 3]; + + addr = ((b1 & 0xF0) << 13) | (b2 << 9) | (b3 << 1); + addr -= s->pos + (uint32_t)i; + + buf[i + 1] = (uint8_t)((b1 & 0x0F) + | ((addr >> 8) & 0xF0)); + + buf[i + 2] = (uint8_t)(((addr >> 16) & 0x0F) + | ((addr >> 7) & 0x10) + | ((addr << 4) & 0xE0)); + + buf[i + 3] = (uint8_t)(((addr >> 4) & 0x7F) + | ((addr >> 13) & 0x80)); + + i += 4 - 2; + + } else if ((instr & 0x7F) == 0x17) { + /* AUIPC */ + instr |= (uint32_t)buf[i + 1] << 8; + instr |= (uint32_t)buf[i + 2] << 16; + instr |= (uint32_t)buf[i + 3] << 24; + + if (instr & 0xE80) { + /* AUIPC's rd doesn't equal x0 or x2. */ + instr2 = get_unaligned_le32(buf + i + 4); + + if (((instr << 8) ^ (instr2 - 3)) & 0xF8003) { + i += 6 - 2; + continue; + } + + addr = (instr & 0xFFFFF000) + (instr2 >> 20); + + instr = 0x17 | (2 << 7) | (instr2 << 12); + instr2 = addr; + } else { + /* AUIPC's rd equals x0 or x2. */ + instr2_rs1 = instr >> 27; + + if ((uint32_t)((instr - 0x3117) << 18) + >= (instr2_rs1 & 0x1D)) { + i += 4 - 2; + continue; + } + + addr = get_unaligned_be32(buf + i + 4); + addr -= s->pos + (uint32_t)i; + + instr2 = (instr >> 12) | (addr << 20); + + instr = 0x17 | (instr2_rs1 << 7) + | ((addr + 0x800) & 0xFFFFF000); + } + + put_unaligned_le32(instr, buf + i); + put_unaligned_le32(instr2, buf + i + 4); + + i += 8 - 2; + } + } + + return i; +} +#endif + /* * Apply the selected BCJ filter. Update *pos and s->pos to match the amount * of data that got filtered. @@ -439,6 +533,11 @@ static void bcj_apply(struct xz_dec_bcj *s, case BCJ_ARM64: filtered = bcj_arm64(s, buf, size); break; +#endif +#ifdef XZ_DEC_RISCV + case BCJ_RISCV: + filtered = bcj_riscv(s, buf, size); + break; #endif default: /* Never reached but silence compiler warnings. */ @@ -615,6 +714,9 @@ XZ_EXTERN enum xz_ret xz_dec_bcj_reset(struct xz_dec_bcj *s, uint8_t id) #endif #ifdef XZ_DEC_ARM64 case BCJ_ARM64: +#endif +#ifdef XZ_DEC_RISCV + case BCJ_RISCV: #endif break; diff --git a/lib/xz/xz_dec_syms.c b/lib/xz/xz_dec_syms.c index 495d2cc2e6e8..f40817d65897 100644 --- a/lib/xz/xz_dec_syms.c +++ b/lib/xz/xz_dec_syms.c @@ -22,6 +22,6 @@ EXPORT_SYMBOL(xz_dec_microlzma_end); #endif MODULE_DESCRIPTION("XZ decompressor"); -MODULE_VERSION("1.1"); +MODULE_VERSION("1.2"); MODULE_AUTHOR("Lasse Collin and Igor Pavlov"); MODULE_LICENSE("Dual BSD/GPL"); diff --git a/lib/xz/xz_private.h b/lib/xz/xz_private.h index 307e0de8c260..a8b1cbe8d21d 100644 --- a/lib/xz/xz_private.h +++ b/lib/xz/xz_private.h @@ -39,6 +39,9 @@ # ifdef CONFIG_XZ_DEC_ARM64 # define XZ_DEC_ARM64 # endif +# ifdef CONFIG_XZ_DEC_RISCV +# define XZ_DEC_RISCV +# endif # ifdef CONFIG_XZ_DEC_MICROLZMA # define XZ_DEC_MICROLZMA # endif @@ -102,7 +105,8 @@ # if defined(XZ_DEC_X86) || defined(XZ_DEC_POWERPC) \ || defined(XZ_DEC_IA64) \ || defined(XZ_DEC_ARM) || defined(XZ_DEC_ARMTHUMB) \ - || defined(XZ_DEC_SPARC) || defined(XZ_DEC_ARM64) + || defined(XZ_DEC_SPARC) || defined(XZ_DEC_ARM64) \ + || defined(XZ_DEC_RISCV) # define XZ_DEC_BCJ # endif #endif From 8653c909922743bceb4800e5cc26087208c9e0e6 Mon Sep 17 00:00:00 2001 From: Lasse Collin Date: Sun, 21 Jul 2024 16:36:28 +0300 Subject: [PATCH 018/103] xz: use 128 MiB dictionary and force single-threaded mode This only affects kernel image compression, not any other xz usage. Desktop kernels on x86-64 are already around 60 MiB. Using a dictionary larger than 32 MiB should have no downsides nowadays as anyone building the kernel should have plenty of RAM. 128 MiB dictionary needs 1346 MiB of RAM with xz versions 5.0.x - 5.6.x in single-threaded mode. On archs that use xz_wrap.sh, kernel decompression is done in single-call mode so a larger dictionary doesn't affect boot-time memory requirements. xz >= 5.6.0 uses multithreaded mode by default which compresses slightly worse than single-threaded mode. Kernel compression rarely used more than one thread anyway because with 32 MiB dictionary size the default block size was 96 MiB in multithreaded mode. So only a single thread was used anyway unless the kernel was over 96 MiB. Comparison to CONFIG_KERNEL_LZMA: It uses "lzma -9" which mapped to 32 MiB dictionary in LZMA Utils 4.32.7 (the final release in 2008). Nowadays the lzma tool on most systems is from XZ Utils where -9 maps to 64 MiB dictionary. So using a 32 MiB dictionary with CONFIG_KERNEL_XZ may have compressed big kernels slightly worse than the old LZMA option. Comparison to CONFIG_KERNEL_ZSTD: zstd uses 128 MiB dictionary. Link: https://lkml.kernel.org/r/20240721133633.47721-14-lasse.collin@tukaani.org Signed-off-by: Lasse Collin Reviewed-by: Sam James Cc: Albert Ou Cc: Catalin Marinas Cc: Emil Renner Berthing Cc: Greg Kroah-Hartman Cc: Herbert Xu Cc: Joel Stanley Cc: Jonathan Corbet Cc: Jubin Zhong Cc: Jules Maselbas Cc: Krzysztof Kozlowski Cc: Michael Ellerman Cc: Palmer Dabbelt Cc: Paul Walmsley Cc: Randy Dunlap Cc: Rui Li Cc: Simon Glass Cc: Thomas Gleixner Cc: Will Deacon Signed-off-by: Andrew Morton --- scripts/xz_wrap.sh | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/scripts/xz_wrap.sh b/scripts/xz_wrap.sh index bb760b721b2c..c8c36441ab70 100755 --- a/scripts/xz_wrap.sh +++ b/scripts/xz_wrap.sh @@ -16,4 +16,15 @@ case $SRCARCH in sparc) BCJ=--sparc ;; esac -exec $XZ --check=crc32 $BCJ --lzma2=$LZMA2OPTS,dict=32MiB +# Use single-threaded mode because it compresses a little better +# (and uses less RAM) than multithreaded mode. +# +# For the best compression, the dictionary size shouldn't be +# smaller than the uncompressed kernel. 128 MiB dictionary +# needs less than 1400 MiB of RAM in single-threaded mode. +# +# On the archs that use this script to compress the kernel, +# decompression in the preboot code is done in single-call mode. +# Thus the dictionary size doesn't affect the memory requirements +# of the preboot decompressor at all. +exec $XZ --check=crc32 --threads=1 $BCJ --lzma2=$LZMA2OPTS,dict=128MiB From 7472ff8adad8655f38b060a602f66e59c93c4793 Mon Sep 17 00:00:00 2001 From: Lasse Collin Date: Sun, 21 Jul 2024 16:36:29 +0300 Subject: [PATCH 019/103] xz: adjust arch-specific options for better kernel compression Use LZMA2 options that match the arch-specific alignment of instructions. This change reduces compressed kernel size 0-2 % depending on the arch. On 1-byte-aligned x86 it makes no difference and on 4-byte-aligned archs it helps the most. Use the ARM-Thumb filter for ARM-Thumb2 kernels. This reduces compressed kernel size about 5 %.[1] Previously such kernels were compressed using the ARM filter which didn't do anything useful with ARM-Thumb2 code. Add BCJ filter support for ARM64 and RISC-V. Compared to unfiltered XZ or plain LZMA, the compressed kernel size is reduced about 5 % on ARM64 and 7 % on RISC-V. A new enough version of the xz tool is required: 5.4.0 for ARM64 and 5.6.0 for RISC-V. With an old xz version, a message is printed to standard error and the kernel is compressed without the filter. Update lib/decompress_unxz.c to match the changes to xz_wrap.sh. Update the CONFIG_KERNEL_XZ help text in init/Kconfig: - Add the RISC-V and ARM64 filters. - Clarify that the PowerPC filter is for big endian only. - Omit IA-64. Link: https://lore.kernel.org/lkml/1637379771-39449-1-git-send-email-zhongjubin@huawei.com/ [1] Link: https://lkml.kernel.org/r/20240721133633.47721-15-lasse.collin@tukaani.org Signed-off-by: Lasse Collin Reviewed-by: Sam James Cc: Simon Glass Cc: Catalin Marinas Cc: Will Deacon Cc: Paul Walmsley Cc: Palmer Dabbelt Cc: Albert Ou Cc: Jubin Zhong Cc: Jules Maselbas Cc: Emil Renner Berthing Cc: Greg Kroah-Hartman Cc: Herbert Xu Cc: Joel Stanley Cc: Jonathan Corbet Cc: Krzysztof Kozlowski Cc: Michael Ellerman Cc: Randy Dunlap Cc: Rui Li Cc: Thomas Gleixner Signed-off-by: Andrew Morton --- init/Kconfig | 5 +- lib/decompress_unxz.c | 14 +++- scripts/xz_wrap.sh | 144 ++++++++++++++++++++++++++++++++++++++++-- 3 files changed, 153 insertions(+), 10 deletions(-) diff --git a/init/Kconfig b/init/Kconfig index 5783a0b87517..583cb07176a9 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -310,8 +310,9 @@ config KERNEL_XZ BCJ filters which can improve compression ratio of executable code. The size of the kernel is about 30% smaller with XZ in comparison to gzip. On architectures for which there is a BCJ - filter (i386, x86_64, ARM, IA-64, PowerPC, and SPARC), XZ - will create a few percent smaller kernel than plain LZMA. + filter (i386, x86_64, ARM, ARM64, RISC-V, big endian PowerPC, + and SPARC), XZ will create a few percent smaller kernel than + plain LZMA. The speed is about the same as with LZMA: The decompression speed of XZ is better than that of bzip2 but worse than gzip diff --git a/lib/decompress_unxz.c b/lib/decompress_unxz.c index 46aa3be13fc5..cae00395d7a6 100644 --- a/lib/decompress_unxz.c +++ b/lib/decompress_unxz.c @@ -126,11 +126,21 @@ #ifdef CONFIG_X86 # define XZ_DEC_X86 #endif -#ifdef CONFIG_PPC +#if defined(CONFIG_PPC) && defined(CONFIG_CPU_BIG_ENDIAN) # define XZ_DEC_POWERPC #endif #ifdef CONFIG_ARM -# define XZ_DEC_ARM +# ifdef CONFIG_THUMB2_KERNEL +# define XZ_DEC_ARMTHUMB +# else +# define XZ_DEC_ARM +# endif +#endif +#ifdef CONFIG_ARM64 +# define XZ_DEC_ARM64 +#endif +#ifdef CONFIG_RISCV +# define XZ_DEC_RISCV #endif #ifdef CONFIG_SPARC # define XZ_DEC_SPARC diff --git a/scripts/xz_wrap.sh b/scripts/xz_wrap.sh index c8c36441ab70..f19369687030 100755 --- a/scripts/xz_wrap.sh +++ b/scripts/xz_wrap.sh @@ -6,14 +6,146 @@ # # Author: Lasse Collin -BCJ= -LZMA2OPTS= +# This has specialized settings for the following archs. However, +# XZ-compressed kernel isn't currently supported on every listed arch. +# +# Arch Align Notes +# arm 2/4 ARM and ARM-Thumb2 +# arm64 4 +# csky 2 +# loongarch 4 +# mips 2/4 MicroMIPS is 2-byte aligned +# parisc 4 +# powerpc 4 Uses its own wrapper for compressors instead of this. +# riscv 2/4 +# s390 2 +# sh 2 +# sparc 4 +# x86 1 +# A few archs use 2-byte or 4-byte aligned instructions depending on +# the kernel config. This function is used to check if the relevant +# config option is set to "y". +is_enabled() +{ + grep -q "^$1=y$" include/config/auto.conf +} + +# XZ_VERSION is needed to disable features that aren't available in +# old XZ Utils versions. +XZ_VERSION=$($XZ --robot --version) || exit +XZ_VERSION=$(printf '%s\n' "$XZ_VERSION" | sed -n 's/^XZ_VERSION=//p') + +# Assume that no BCJ filter is available. +BCJ= + +# Set the instruction alignment to 1, 2, or 4 bytes. +# +# Set the BCJ filter if one is available. +# It must match the #ifdef usage in lib/decompress_unxz.c. case $SRCARCH in - x86) BCJ=--x86 ;; - powerpc) BCJ=--powerpc ;; - arm) BCJ=--arm ;; - sparc) BCJ=--sparc ;; + arm) + if is_enabled CONFIG_THUMB2_KERNEL; then + ALIGN=2 + BCJ=--armthumb + else + ALIGN=4 + BCJ=--arm + fi + ;; + + arm64) + ALIGN=4 + + # ARM64 filter was added in XZ Utils 5.4.0. + if [ "$XZ_VERSION" -ge 50040002 ]; then + BCJ=--arm64 + else + echo "$0: Upgrading to xz >= 5.4.0" \ + "would enable the ARM64 filter" \ + "for better compression" >&2 + fi + ;; + + csky) + ALIGN=2 + ;; + + loongarch) + ALIGN=4 + ;; + + mips) + if is_enabled CONFIG_CPU_MICROMIPS; then + ALIGN=2 + else + ALIGN=4 + fi + ;; + + parisc) + ALIGN=4 + ;; + + powerpc) + ALIGN=4 + + # The filter is only for big endian instruction encoding. + if is_enabled CONFIG_CPU_BIG_ENDIAN; then + BCJ=--powerpc + fi + ;; + + riscv) + if is_enabled CONFIG_RISCV_ISA_C; then + ALIGN=2 + else + ALIGN=4 + fi + + # RISC-V filter was added in XZ Utils 5.6.0. + if [ "$XZ_VERSION" -ge 50060002 ]; then + BCJ=--riscv + else + echo "$0: Upgrading to xz >= 5.6.0" \ + "would enable the RISC-V filter" \ + "for better compression" >&2 + fi + ;; + + s390) + ALIGN=2 + ;; + + sh) + ALIGN=2 + ;; + + sparc) + ALIGN=4 + BCJ=--sparc + ;; + + x86) + ALIGN=1 + BCJ=--x86 + ;; + + *) + echo "$0: Arch-specific tuning is missing for '$SRCARCH'" >&2 + + # Guess 2-byte-aligned instructions. Guessing too low + # should hurt less than guessing too high. + ALIGN=2 + ;; +esac + +# Select the LZMA2 options matching the instruction alignment. +case $ALIGN in + 1) LZMA2OPTS= ;; + 2) LZMA2OPTS=lp=1 ;; + 4) LZMA2OPTS=lp=2,lc=2 ;; + *) echo "$0: ALIGN wrong or missing" >&2; exit 1 ;; esac # Use single-threaded mode because it compresses a little better From 181e71f6626ce04122ed04fa5f4de6726c1ac848 Mon Sep 17 00:00:00 2001 From: Lasse Collin Date: Sun, 21 Jul 2024 16:36:30 +0300 Subject: [PATCH 020/103] arm64: boot: add Image.xz support The Image.* targets existed for other compressors already. Bootloader support is needed for decompression. This is for CONFIG_EFI_ZBOOT=n. With CONFIG_EFI_ZBOOT=y, XZ was already available. Link: https://lkml.kernel.org/r/20240721133633.47721-16-lasse.collin@tukaani.org Signed-off-by: Lasse Collin Cc: Simon Glass Cc: Catalin Marinas Cc: Will Deacon Cc: Jules Maselbas Cc: Albert Ou Cc: Emil Renner Berthing Cc: Greg Kroah-Hartman Cc: Herbert Xu Cc: Joel Stanley Cc: Jonathan Corbet Cc: Jubin Zhong Cc: Krzysztof Kozlowski Cc: Michael Ellerman Cc: Palmer Dabbelt Cc: Paul Walmsley Cc: Randy Dunlap Cc: Rui Li Cc: Sam James Cc: Thomas Gleixner Signed-off-by: Andrew Morton --- arch/arm64/boot/Makefile | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/arm64/boot/Makefile b/arch/arm64/boot/Makefile index 607a67a649c4..b5a08333bc57 100644 --- a/arch/arm64/boot/Makefile +++ b/arch/arm64/boot/Makefile @@ -17,7 +17,7 @@ OBJCOPYFLAGS_Image :=-O binary -R .note -R .note.gnu.build-id -R .comment -S targets := Image Image.bz2 Image.gz Image.lz4 Image.lzma Image.lzo \ - Image.zst image.fit + Image.zst Image.xz image.fit $(obj)/Image: vmlinux FORCE $(call if_changed,objcopy) @@ -40,6 +40,9 @@ $(obj)/Image.lzo: $(obj)/Image FORCE $(obj)/Image.zst: $(obj)/Image FORCE $(call if_changed,zstd) +$(obj)/Image.xz: $(obj)/Image FORCE + $(call if_changed,xzkern) + $(obj)/image.fit: $(obj)/Image $(obj)/dts/dtbs-list FORCE $(call if_changed,fit) From ab4ce9831a8e3158ec70b3c8608b94101600d551 Mon Sep 17 00:00:00 2001 From: Lasse Collin Date: Sun, 21 Jul 2024 16:36:31 +0300 Subject: [PATCH 021/103] riscv: boot: add Image.xz support The Image.* targets existed for other compressors already. Bootloader support is needed for decompression. This is for CONFIG_EFI_ZBOOT=n. With CONFIG_EFI_ZBOOT=y, XZ was already available. Comparision with Linux 6.10 RV64GC tinyconfig (in KiB): 1027 Image 594 Image.gz 541 Image.zst 510 Image.lzma 474 Image.xz Link: https://lkml.kernel.org/r/20240721133633.47721-17-lasse.collin@tukaani.org Signed-off-by: Lasse Collin Reviewed-by: Emil Renner Berthing Cc: Paul Walmsley Cc: Palmer Dabbelt Cc: Albert Ou Cc: Jules Maselbas Cc: Catalin Marinas Cc: Greg Kroah-Hartman Cc: Herbert Xu Cc: Joel Stanley Cc: Jonathan Corbet Cc: Jubin Zhong Cc: Krzysztof Kozlowski Cc: Michael Ellerman Cc: Randy Dunlap Cc: Rui Li Cc: Sam James Cc: Simon Glass Cc: Thomas Gleixner Cc: Will Deacon Signed-off-by: Andrew Morton --- arch/riscv/Kconfig | 1 + arch/riscv/Makefile | 6 ++++-- arch/riscv/boot/Makefile | 3 +++ 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 0f3cd7c3a436..5a2cce767628 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -156,6 +156,7 @@ config RISCV select HAVE_KERNEL_LZO if !XIP_KERNEL && !EFI_ZBOOT select HAVE_KERNEL_UNCOMPRESSED if !XIP_KERNEL && !EFI_ZBOOT select HAVE_KERNEL_ZSTD if !XIP_KERNEL && !EFI_ZBOOT + select HAVE_KERNEL_XZ if !XIP_KERNEL && !EFI_ZBOOT select HAVE_KPROBES if !XIP_KERNEL select HAVE_KRETPROBES if !XIP_KERNEL # https://github.com/ClangBuiltLinux/linux/issues/1881 diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile index 6fe682139d2e..d469db9f46f4 100644 --- a/arch/riscv/Makefile +++ b/arch/riscv/Makefile @@ -159,6 +159,7 @@ boot-image-$(CONFIG_KERNEL_LZ4) := Image.lz4 boot-image-$(CONFIG_KERNEL_LZMA) := Image.lzma boot-image-$(CONFIG_KERNEL_LZO) := Image.lzo boot-image-$(CONFIG_KERNEL_ZSTD) := Image.zst +boot-image-$(CONFIG_KERNEL_XZ) := Image.xz ifdef CONFIG_RISCV_M_MODE boot-image-$(CONFIG_ARCH_CANAAN) := loader.bin endif @@ -183,12 +184,12 @@ endif vdso-install-y += arch/riscv/kernel/vdso/vdso.so.dbg vdso-install-$(CONFIG_COMPAT) += arch/riscv/kernel/compat_vdso/compat_vdso.so.dbg -BOOT_TARGETS := Image Image.gz Image.bz2 Image.lz4 Image.lzma Image.lzo Image.zst loader loader.bin xipImage vmlinuz.efi +BOOT_TARGETS := Image Image.gz Image.bz2 Image.lz4 Image.lzma Image.lzo Image.zst Image.xz loader loader.bin xipImage vmlinuz.efi all: $(notdir $(KBUILD_IMAGE)) loader.bin: loader -Image.gz Image.bz2 Image.lz4 Image.lzma Image.lzo Image.zst loader xipImage vmlinuz.efi: Image +Image.gz Image.bz2 Image.lz4 Image.lzma Image.lzo Image.zst Image.xz loader xipImage vmlinuz.efi: Image $(BOOT_TARGETS): vmlinux $(Q)$(MAKE) $(build)=$(boot) $(boot)/$@ @@ -225,6 +226,7 @@ define archhelp echo ' Image.lzma - Compressed kernel image (arch/riscv/boot/Image.lzma)' echo ' Image.lzo - Compressed kernel image (arch/riscv/boot/Image.lzo)' echo ' Image.zst - Compressed kernel image (arch/riscv/boot/Image.zst)' + echo ' Image.xz - Compressed kernel image (arch/riscv/boot/Image.xz)' echo ' vmlinuz.efi - Compressed EFI kernel image (arch/riscv/boot/vmlinuz.efi)' echo ' Default when CONFIG_EFI_ZBOOT=y' echo ' xipImage - Execute-in-place kernel image (arch/riscv/boot/xipImage)' diff --git a/arch/riscv/boot/Makefile b/arch/riscv/boot/Makefile index 4e9e7a28bf9b..b25d524ce5eb 100644 --- a/arch/riscv/boot/Makefile +++ b/arch/riscv/boot/Makefile @@ -64,6 +64,9 @@ $(obj)/Image.lzo: $(obj)/Image FORCE $(obj)/Image.zst: $(obj)/Image FORCE $(call if_changed,zstd) +$(obj)/Image.xz: $(obj)/Image FORCE + $(call if_changed,xzkern) + $(obj)/loader.bin: $(obj)/loader FORCE $(call if_changed,objcopy) From c6f371bab25edccd39caa5dd452b50d9dfdf4ff0 Mon Sep 17 00:00:00 2001 From: Lasse Collin Date: Wed, 24 Jul 2024 14:05:41 +0300 Subject: [PATCH 022/103] xz: remove XZ_EXTERN and extern from functions XZ_EXTERN was used to make internal functions static in the preboot code. However, in other decompressors this hasn't been done. On x86-64, this makes no difference to the kernel image size. Omit XZ_EXTERN and let some of the internal functions be extern in the preboot code. Omitting XZ_EXTERN from include/linux/xz.h fixes warnings in "make htmldocs" and makes the intradocument links to xz_dec functions work in Documentation/staging/xz.rst. The alternative would have been to add "XZ_EXTERN" to c_id_attributes in Documentation/conf.py but omitting XZ_EXTERN seemed cleaner. Link: https://lore.kernel.org/lkml/20240723205437.3c0664b0@kaneli/ Link: https://lkml.kernel.org/r/20240724110544.16430-1-lasse.collin@tukaani.org Signed-off-by: Lasse Collin Tested-by: Michael Ellerman (powerpc) Cc: Jonathan Corbet Cc: Sam James Cc: Albert Ou Cc: Catalin Marinas Cc: Emil Renner Berthing Cc: Greg Kroah-Hartman Cc: Herbert Xu Cc: Joel Stanley Cc: Jubin Zhong Cc: Jules Maselbas Cc: Krzysztof Kozlowski Cc: Palmer Dabbelt Cc: Paul Walmsley Cc: Randy Dunlap Cc: Rui Li Cc: Simon Glass Cc: Thomas Gleixner Cc: Will Deacon Signed-off-by: Andrew Morton --- Documentation/staging/xz.rst | 3 --- arch/powerpc/boot/xz_config.h | 3 --- include/linux/xz.h | 35 ++++++++++++----------------------- lib/decompress_unxz.c | 1 - lib/xz/xz_crc32.c | 4 ++-- lib/xz/xz_dec_bcj.c | 9 ++++----- lib/xz/xz_dec_lzma2.c | 10 ++++------ lib/xz/xz_dec_stream.c | 8 ++++---- lib/xz/xz_private.h | 20 ++++++++------------ 9 files changed, 34 insertions(+), 59 deletions(-) diff --git a/Documentation/staging/xz.rst b/Documentation/staging/xz.rst index e1054e9a8e65..6953a189e5f2 100644 --- a/Documentation/staging/xz.rst +++ b/Documentation/staging/xz.rst @@ -95,7 +95,4 @@ xz_dec API This is available with ``#include ``. -``XZ_EXTERN`` is a macro used in the preboot code. Ignore it when -reading this documentation. - .. kernel-doc:: include/linux/xz.h diff --git a/arch/powerpc/boot/xz_config.h b/arch/powerpc/boot/xz_config.h index ebfadd39e192..9506a96ebbcc 100644 --- a/arch/powerpc/boot/xz_config.h +++ b/arch/powerpc/boot/xz_config.h @@ -50,11 +50,8 @@ static inline void put_unaligned_be32(u32 val, void *p) /* prevent the inclusion of the xz-preboot MM headers */ #define DECOMPR_MM_H #define memmove memmove -#define XZ_EXTERN static /* xz.h needs to be included directly since we need enum xz_mode */ #include "../../../include/linux/xz.h" -#undef XZ_EXTERN - #endif diff --git a/include/linux/xz.h b/include/linux/xz.h index 701d62c02b9a..58ae1d746c6f 100644 --- a/include/linux/xz.h +++ b/include/linux/xz.h @@ -18,11 +18,6 @@ # include #endif -/* In Linux, this is used to make extern functions static when needed. */ -#ifndef XZ_EXTERN -# define XZ_EXTERN extern -#endif - /** * enum xz_mode - Operation mode * @@ -190,7 +185,7 @@ struct xz_dec; * ready to be used with xz_dec_run(). If memory allocation fails, * xz_dec_init() returns NULL. */ -XZ_EXTERN struct xz_dec *xz_dec_init(enum xz_mode mode, uint32_t dict_max); +struct xz_dec *xz_dec_init(enum xz_mode mode, uint32_t dict_max); /** * xz_dec_run() - Run the XZ decoder @@ -210,7 +205,7 @@ XZ_EXTERN struct xz_dec *xz_dec_init(enum xz_mode mode, uint32_t dict_max); * get that amount valid data from the beginning of the stream. You must use * the multi-call decoder if you don't want to uncompress the whole stream. */ -XZ_EXTERN enum xz_ret xz_dec_run(struct xz_dec *s, struct xz_buf *b); +enum xz_ret xz_dec_run(struct xz_dec *s, struct xz_buf *b); /** * xz_dec_reset() - Reset an already allocated decoder state @@ -223,14 +218,14 @@ XZ_EXTERN enum xz_ret xz_dec_run(struct xz_dec *s, struct xz_buf *b); * xz_dec_run(). Thus, explicit call to xz_dec_reset() is useful only in * multi-call mode. */ -XZ_EXTERN void xz_dec_reset(struct xz_dec *s); +void xz_dec_reset(struct xz_dec *s); /** * xz_dec_end() - Free the memory allocated for the decoder state * @s: Decoder state allocated using xz_dec_init(). If s is NULL, * this function does nothing. */ -XZ_EXTERN void xz_dec_end(struct xz_dec *s); +void xz_dec_end(struct xz_dec *s); /** * DOC: MicroLZMA decompressor @@ -244,10 +239,6 @@ XZ_EXTERN void xz_dec_end(struct xz_dec *s); * 3/0/2, the first byte is 0xA2. This way the first byte can never be 0x00. * Just like with LZMA2, lc + lp <= 4 must be true. The LZMA end-of-stream * marker must not be used. The unused values are reserved for future use. - * - * These functions aren't used or available in preboot code and thus aren't - * marked with XZ_EXTERN. This avoids warnings about static functions that - * are never defined. */ /* @@ -272,8 +263,8 @@ struct xz_dec_microlzma; * struct xz_dec_microlzma. If memory allocation fails or * dict_size is invalid, NULL is returned. */ -extern struct xz_dec_microlzma *xz_dec_microlzma_alloc(enum xz_mode mode, - uint32_t dict_size); +struct xz_dec_microlzma *xz_dec_microlzma_alloc(enum xz_mode mode, + uint32_t dict_size); /** * xz_dec_microlzma_reset() - Reset the MicroLZMA decoder state @@ -289,9 +280,8 @@ extern struct xz_dec_microlzma *xz_dec_microlzma_alloc(enum xz_mode mode, * requiring stdbool.h. This should normally be set to true. * When this is set to false, error detection is weaker. */ -extern void xz_dec_microlzma_reset(struct xz_dec_microlzma *s, - uint32_t comp_size, uint32_t uncomp_size, - int uncomp_size_is_exact); +void xz_dec_microlzma_reset(struct xz_dec_microlzma *s, uint32_t comp_size, + uint32_t uncomp_size, int uncomp_size_is_exact); /** * xz_dec_microlzma_run() - Run the MicroLZMA decoder @@ -329,15 +319,14 @@ extern void xz_dec_microlzma_reset(struct xz_dec_microlzma *s, * may be changed normally like with XZ_PREALLOC. This way input data can be * provided from non-contiguous memory. */ -extern enum xz_ret xz_dec_microlzma_run(struct xz_dec_microlzma *s, - struct xz_buf *b); +enum xz_ret xz_dec_microlzma_run(struct xz_dec_microlzma *s, struct xz_buf *b); /** * xz_dec_microlzma_end() - Free the memory allocated for the decoder state * @s: Decoder state allocated using xz_dec_microlzma_alloc(). * If s is NULL, this function does nothing. */ -extern void xz_dec_microlzma_end(struct xz_dec_microlzma *s); +void xz_dec_microlzma_end(struct xz_dec_microlzma *s); /* * Standalone build (userspace build or in-kernel build for boot time use) @@ -358,13 +347,13 @@ extern void xz_dec_microlzma_end(struct xz_dec_microlzma *s); * This must be called before any other xz_* function to initialize * the CRC32 lookup table. */ -XZ_EXTERN void xz_crc32_init(void); +void xz_crc32_init(void); /* * Update CRC32 value using the polynomial from IEEE-802.3. To start a new * calculation, the third argument must be zero. To continue the calculation, * the previously returned value is passed as the third argument. */ -XZ_EXTERN uint32_t xz_crc32(const uint8_t *buf, size_t size, uint32_t crc); +uint32_t xz_crc32(const uint8_t *buf, size_t size, uint32_t crc); #endif #endif diff --git a/lib/decompress_unxz.c b/lib/decompress_unxz.c index cae00395d7a6..32138bb8ef77 100644 --- a/lib/decompress_unxz.c +++ b/lib/decompress_unxz.c @@ -107,7 +107,6 @@ #ifdef __KERNEL__ # include #endif -#define XZ_EXTERN STATIC #ifndef XZ_PREBOOT # include diff --git a/lib/xz/xz_crc32.c b/lib/xz/xz_crc32.c index effdf34ec48d..6a7906a328ba 100644 --- a/lib/xz/xz_crc32.c +++ b/lib/xz/xz_crc32.c @@ -26,7 +26,7 @@ STATIC_RW_DATA uint32_t xz_crc32_table[256]; -XZ_EXTERN void xz_crc32_init(void) +void xz_crc32_init(void) { const uint32_t poly = 0xEDB88320; @@ -45,7 +45,7 @@ XZ_EXTERN void xz_crc32_init(void) return; } -XZ_EXTERN uint32_t xz_crc32(const uint8_t *buf, size_t size, uint32_t crc) +uint32_t xz_crc32(const uint8_t *buf, size_t size, uint32_t crc) { crc = ~crc; diff --git a/lib/xz/xz_dec_bcj.c b/lib/xz/xz_dec_bcj.c index 42d7f268726f..8237db17eee3 100644 --- a/lib/xz/xz_dec_bcj.c +++ b/lib/xz/xz_dec_bcj.c @@ -572,9 +572,8 @@ static void bcj_flush(struct xz_dec_bcj *s, struct xz_buf *b) * data in chunks of 1-16 bytes. To hide this issue, this function does * some buffering. */ -XZ_EXTERN enum xz_ret xz_dec_bcj_run(struct xz_dec_bcj *s, - struct xz_dec_lzma2 *lzma2, - struct xz_buf *b) +enum xz_ret xz_dec_bcj_run(struct xz_dec_bcj *s, struct xz_dec_lzma2 *lzma2, + struct xz_buf *b) { size_t out_start; @@ -682,7 +681,7 @@ XZ_EXTERN enum xz_ret xz_dec_bcj_run(struct xz_dec_bcj *s, return s->ret; } -XZ_EXTERN struct xz_dec_bcj *xz_dec_bcj_create(bool single_call) +struct xz_dec_bcj *xz_dec_bcj_create(bool single_call) { struct xz_dec_bcj *s = kmalloc(sizeof(*s), GFP_KERNEL); if (s != NULL) @@ -691,7 +690,7 @@ XZ_EXTERN struct xz_dec_bcj *xz_dec_bcj_create(bool single_call) return s; } -XZ_EXTERN enum xz_ret xz_dec_bcj_reset(struct xz_dec_bcj *s, uint8_t id) +enum xz_ret xz_dec_bcj_reset(struct xz_dec_bcj *s, uint8_t id) { switch (id) { #ifdef XZ_DEC_X86 diff --git a/lib/xz/xz_dec_lzma2.c b/lib/xz/xz_dec_lzma2.c index 613939f5dd6c..83bb66b6016d 100644 --- a/lib/xz/xz_dec_lzma2.c +++ b/lib/xz/xz_dec_lzma2.c @@ -960,8 +960,7 @@ static bool lzma2_lzma(struct xz_dec_lzma2 *s, struct xz_buf *b) * Take care of the LZMA2 control layer, and forward the job of actual LZMA * decoding or copying of uncompressed chunks to other functions. */ -XZ_EXTERN enum xz_ret xz_dec_lzma2_run(struct xz_dec_lzma2 *s, - struct xz_buf *b) +enum xz_ret xz_dec_lzma2_run(struct xz_dec_lzma2 *s, struct xz_buf *b) { uint32_t tmp; @@ -1137,8 +1136,7 @@ XZ_EXTERN enum xz_ret xz_dec_lzma2_run(struct xz_dec_lzma2 *s, return XZ_OK; } -XZ_EXTERN struct xz_dec_lzma2 *xz_dec_lzma2_create(enum xz_mode mode, - uint32_t dict_max) +struct xz_dec_lzma2 *xz_dec_lzma2_create(enum xz_mode mode, uint32_t dict_max) { struct xz_dec_lzma2 *s = kmalloc(sizeof(*s), GFP_KERNEL); if (s == NULL) @@ -1161,7 +1159,7 @@ XZ_EXTERN struct xz_dec_lzma2 *xz_dec_lzma2_create(enum xz_mode mode, return s; } -XZ_EXTERN enum xz_ret xz_dec_lzma2_reset(struct xz_dec_lzma2 *s, uint8_t props) +enum xz_ret xz_dec_lzma2_reset(struct xz_dec_lzma2 *s, uint8_t props) { /* This limits dictionary size to 3 GiB to keep parsing simpler. */ if (props > 39) @@ -1197,7 +1195,7 @@ XZ_EXTERN enum xz_ret xz_dec_lzma2_reset(struct xz_dec_lzma2 *s, uint8_t props) return XZ_OK; } -XZ_EXTERN void xz_dec_lzma2_end(struct xz_dec_lzma2 *s) +void xz_dec_lzma2_end(struct xz_dec_lzma2 *s) { if (DEC_IS_MULTI(s->dict.mode)) vfree(s->dict.buf); diff --git a/lib/xz/xz_dec_stream.c b/lib/xz/xz_dec_stream.c index 0058406ccd17..f9d003684d56 100644 --- a/lib/xz/xz_dec_stream.c +++ b/lib/xz/xz_dec_stream.c @@ -746,7 +746,7 @@ static enum xz_ret dec_main(struct xz_dec *s, struct xz_buf *b) * actually succeeds (that's the price to pay of using the output buffer as * the workspace). */ -XZ_EXTERN enum xz_ret xz_dec_run(struct xz_dec *s, struct xz_buf *b) +enum xz_ret xz_dec_run(struct xz_dec *s, struct xz_buf *b) { size_t in_start; size_t out_start; @@ -782,7 +782,7 @@ XZ_EXTERN enum xz_ret xz_dec_run(struct xz_dec *s, struct xz_buf *b) return ret; } -XZ_EXTERN struct xz_dec *xz_dec_init(enum xz_mode mode, uint32_t dict_max) +struct xz_dec *xz_dec_init(enum xz_mode mode, uint32_t dict_max) { struct xz_dec *s = kmalloc(sizeof(*s), GFP_KERNEL); if (s == NULL) @@ -812,7 +812,7 @@ error_bcj: return NULL; } -XZ_EXTERN void xz_dec_reset(struct xz_dec *s) +void xz_dec_reset(struct xz_dec *s) { s->sequence = SEQ_STREAM_HEADER; s->allow_buf_error = false; @@ -824,7 +824,7 @@ XZ_EXTERN void xz_dec_reset(struct xz_dec *s) s->temp.size = STREAM_HEADER_SIZE; } -XZ_EXTERN void xz_dec_end(struct xz_dec *s) +void xz_dec_end(struct xz_dec *s) { if (s != NULL) { xz_dec_lzma2_end(s->lzma2); diff --git a/lib/xz/xz_private.h b/lib/xz/xz_private.h index a8b1cbe8d21d..5f1294a1408c 100644 --- a/lib/xz/xz_private.h +++ b/lib/xz/xz_private.h @@ -115,8 +115,7 @@ * Allocate memory for LZMA2 decoder. xz_dec_lzma2_reset() must be used * before calling xz_dec_lzma2_run(). */ -XZ_EXTERN struct xz_dec_lzma2 *xz_dec_lzma2_create(enum xz_mode mode, - uint32_t dict_max); +struct xz_dec_lzma2 *xz_dec_lzma2_create(enum xz_mode mode, uint32_t dict_max); /* * Decode the LZMA2 properties (one byte) and reset the decoder. Return @@ -124,22 +123,20 @@ XZ_EXTERN struct xz_dec_lzma2 *xz_dec_lzma2_create(enum xz_mode mode, * big enough, and XZ_OPTIONS_ERROR if props indicates something that this * decoder doesn't support. */ -XZ_EXTERN enum xz_ret xz_dec_lzma2_reset(struct xz_dec_lzma2 *s, - uint8_t props); +enum xz_ret xz_dec_lzma2_reset(struct xz_dec_lzma2 *s, uint8_t props); /* Decode raw LZMA2 stream from b->in to b->out. */ -XZ_EXTERN enum xz_ret xz_dec_lzma2_run(struct xz_dec_lzma2 *s, - struct xz_buf *b); +enum xz_ret xz_dec_lzma2_run(struct xz_dec_lzma2 *s, struct xz_buf *b); /* Free the memory allocated for the LZMA2 decoder. */ -XZ_EXTERN void xz_dec_lzma2_end(struct xz_dec_lzma2 *s); +void xz_dec_lzma2_end(struct xz_dec_lzma2 *s); #ifdef XZ_DEC_BCJ /* * Allocate memory for BCJ decoders. xz_dec_bcj_reset() must be used before * calling xz_dec_bcj_run(). */ -XZ_EXTERN struct xz_dec_bcj *xz_dec_bcj_create(bool single_call); +struct xz_dec_bcj *xz_dec_bcj_create(bool single_call); /* * Decode the Filter ID of a BCJ filter. This implementation doesn't @@ -147,16 +144,15 @@ XZ_EXTERN struct xz_dec_bcj *xz_dec_bcj_create(bool single_call); * is needed. Returns XZ_OK if the given Filter ID is supported. * Otherwise XZ_OPTIONS_ERROR is returned. */ -XZ_EXTERN enum xz_ret xz_dec_bcj_reset(struct xz_dec_bcj *s, uint8_t id); +enum xz_ret xz_dec_bcj_reset(struct xz_dec_bcj *s, uint8_t id); /* * Decode raw BCJ + LZMA2 stream. This must be used only if there actually is * a BCJ filter in the chain. If the chain has only LZMA2, xz_dec_lzma2_run() * must be called directly. */ -XZ_EXTERN enum xz_ret xz_dec_bcj_run(struct xz_dec_bcj *s, - struct xz_dec_lzma2 *lzma2, - struct xz_buf *b); +enum xz_ret xz_dec_bcj_run(struct xz_dec_bcj *s, struct xz_dec_lzma2 *lzma2, + struct xz_buf *b); /* Free the memory allocated for the BCJ filters. */ #define xz_dec_bcj_end(s) kfree(s) From d1c7848b58c610bc83f4b05ff0b8244b59f56175 Mon Sep 17 00:00:00 2001 From: Julian Sun Date: Tue, 23 Jul 2024 05:11:54 -0400 Subject: [PATCH 023/103] scripts: add macro_checker script to check unused parameters in macros Recently, I saw a patch[1] on the ext4 mailing list regarding the correction of a macro definition error. Jan mentioned that "The bug in the macro is a really nasty trap...". Because existing compilers are unable to detect unused parameters in macro definitions. This inspired me to write a script to check for unused parameters in macro definitions and to run it. Surprisingly, the script uncovered numerous issues across various subsystems, including filesystems, drivers, and sound etc. Some of these issues involved parameters that were accepted but never used, for example: #define XFS_DAENTER_DBS(mp,w) \ (XFS_DA_NODE_MAXDEPTH + (((w) == XFS_DATA_FORK) ? 2 : 0)) where mp was unused. While others are actual bugs. For example: #define HAL_SEQ_WCSS_UMAC_CE0_SRC_REG(x) \ (ab->hw_params.regs->hal_seq_wcss_umac_ce0_src_reg) #define HAL_SEQ_WCSS_UMAC_CE0_DST_REG(x) \ (ab->hw_params.regs->hal_seq_wcss_umac_ce0_dst_reg) #define HAL_SEQ_WCSS_UMAC_CE1_SRC_REG(x) \ (ab->hw_params.regs->hal_seq_wcss_umac_ce1_src_reg) #define HAL_SEQ_WCSS_UMAC_CE1_DST_REG(x) \ (ab->hw_params.regs->hal_seq_wcss_umac_ce1_dst_reg) where x was entirely unused, and instead, a local variable ab was used. I have submitted patches[2-5] to fix some of these issues, but due to the large number, many still remain unaddressed. I believe that the kernel and matainers would benefit from this script to check for unused parameters in macro definitions. It should be noted that it may cause some false positives in conditional compilation scenarios, such as #ifdef DEBUG static int debug(arg) {}; #else #define debug(arg) #endif So the caller needs to manually verify whether it is a true issue. But this should be fine, because Maintainers should only need to review their own subsystems, which typically results in only a few reports. [1]: https://patchwork.ozlabs.org/project/linux-ext4/patch/1717652596-58760-1-git-send-email-carrionbent@linux.alibaba.com/ [2]: https://lore.kernel.org/linux-xfs/20240721112701.212342-1-sunjunchao2870@gmail.com/ [3]: https://lore.kernel.org/linux-bcachefs/20240721123943.246705-1-sunjunchao2870@gmail.com/ [4]: https://sourceforge.net/p/linux-f2fs/mailman/message/58797811/ [5]: https://sourceforge.net/p/linux-f2fs/mailman/message/58797812/ [sunjunchao2870@gmail.com: reduce false positives] Link: https://lkml.kernel.org/r/20240726031310.254742-1-sunjunchao2870@gmail.com Link: https://lkml.kernel.org/r/20240723091154.52458-1-sunjunchao2870@gmail.com Signed-off-by: Julian Sun Cc: Al Viro Cc: Christian Brauner Cc: Darrick J. Wong Cc: Jan Kara Cc: Junchao Sun Cc: Kalle Valo Cc: Masahiro Yamada Cc: Miguel Ojeda Cc: Nicolas Schier Signed-off-by: Andrew Morton --- scripts/macro_checker.py | 131 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 131 insertions(+) create mode 100755 scripts/macro_checker.py diff --git a/scripts/macro_checker.py b/scripts/macro_checker.py new file mode 100755 index 000000000000..ba550982e98f --- /dev/null +++ b/scripts/macro_checker.py @@ -0,0 +1,131 @@ +#!/usr/bin/python3 +# SPDX-License-Identifier: GPL-2.0 +# Author: Julian Sun + +""" Find macro definitions with unused parameters. """ + +import argparse +import os +import re + +parser = argparse.ArgumentParser() + +parser.add_argument("path", type=str, help="The file or dir path that needs check") +parser.add_argument("-v", "--verbose", action="store_true", + help="Check conditional macros, but may lead to more false positives") +args = parser.parse_args() + +macro_pattern = r"#define\s+(\w+)\(([^)]*)\)" +# below vars were used to reduce false positives +fp_patterns = [r"\s*do\s*\{\s*\}\s*while\s*\(\s*0\s*\)", + r"\(?0\)?", r"\(?1\)?"] +correct_macros = [] +cond_compile_mark = "#if" +cond_compile_end = "#endif" + +def check_macro(macro_line, report): + match = re.match(macro_pattern, macro_line) + if match: + macro_def = re.sub(macro_pattern, '', macro_line) + identifier = match.group(1) + content = match.group(2) + arguments = [item.strip() for item in content.split(',') if item.strip()] + + macro_def = macro_def.strip() + if not macro_def: + return + # used to reduce false positives, like #define endfor_nexthops(rt) } + if len(macro_def) == 1: + return + + for fp_pattern in fp_patterns: + if (re.match(fp_pattern, macro_def)): + return + + for arg in arguments: + # used to reduce false positives + if "..." in arg: + return + for arg in arguments: + if not arg in macro_def and report == False: + return + # if there is a correct macro with the same name, do not report it. + if not arg in macro_def and identifier not in correct_macros: + print(f"Argument {arg} is not used in function-line macro {identifier}") + return + + correct_macros.append(identifier) + + +# remove comment and whitespace +def macro_strip(macro): + comment_pattern1 = r"\/\/*" + comment_pattern2 = r"\/\**\*\/" + + macro = macro.strip() + macro = re.sub(comment_pattern1, '', macro) + macro = re.sub(comment_pattern2, '', macro) + + return macro + +def file_check_macro(file_path, report): + # number of conditional compiling + cond_compile = 0 + # only check .c and .h file + if not file_path.endswith(".c") and not file_path.endswith(".h"): + return + + with open(file_path, "r") as f: + while True: + line = f.readline() + if not line: + break + line = line.strip() + if line.startswith(cond_compile_mark): + cond_compile += 1 + continue + if line.startswith(cond_compile_end): + cond_compile -= 1 + continue + + macro = re.match(macro_pattern, line) + if macro: + macro = macro_strip(macro.string) + while macro[-1] == '\\': + macro = macro[0:-1] + macro = macro.strip() + macro += f.readline() + macro = macro_strip(macro) + if not args.verbose: + if file_path.endswith(".c") and cond_compile != 0: + continue + # 1 is for #ifdef xxx at the beginning of the header file + if file_path.endswith(".h") and cond_compile != 1: + continue + check_macro(macro, report) + +def get_correct_macros(path): + file_check_macro(path, False) + +def dir_check_macro(dir_path): + + for dentry in os.listdir(dir_path): + path = os.path.join(dir_path, dentry) + if os.path.isdir(path): + dir_check_macro(path) + elif os.path.isfile(path): + get_correct_macros(path) + file_check_macro(path, True) + + +def main(): + if os.path.isfile(args.path): + get_correct_macros(args.path) + file_check_macro(args.path, True) + elif os.path.isdir(args.path): + dir_check_macro(args.path) + else: + print(f"{args.path} doesn't exit or is neither a file nor a dir") + +if __name__ == "__main__": + main() \ No newline at end of file From a633a4b8001a7f2a12584f267a3280990d9ababa Mon Sep 17 00:00:00 2001 From: Kuan-Ying Lee Date: Tue, 23 Jul 2024 14:48:57 +0800 Subject: [PATCH 024/103] scripts/gdb: fix timerlist parsing issue Patch series "Fix some GDB command error and add some GDB commands", v3. Fix some GDB command errors and add some useful GDB commands. This patch (of 5): Commit 7988e5ae2be7 ("tick: Split nohz and highres features from nohz_mode") and commit 7988e5ae2be7 ("tick: Split nohz and highres features from nohz_mode") move 'tick_stopped' and 'nohz_mode' to flags field which will break the gdb lx-mounts command: (gdb) lx-timerlist Python Exception : There is no member named nohz_mode. Error occurred in Python: There is no member named nohz_mode. (gdb) lx-timerlist Python Exception : There is no member named tick_stopped. Error occurred in Python: There is no member named tick_stopped. We move 'tick_stopped' and 'nohz_mode' to flags field instead. Link: https://lkml.kernel.org/r/20240723064902.124154-1-kuan-ying.lee@canonical.com Link: https://lkml.kernel.org/r/20240723064902.124154-2-kuan-ying.lee@canonical.com Fixes: a478ffb2ae23 ("tick: Move individual bit features to debuggable mask accesses") Fixes: 7988e5ae2be7 ("tick: Split nohz and highres features from nohz_mode") Signed-off-by: Kuan-Ying Lee Cc: Jan Kiszka Cc: Kieran Bingham Cc: Signed-off-by: Andrew Morton --- scripts/gdb/linux/timerlist.py | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/scripts/gdb/linux/timerlist.py b/scripts/gdb/linux/timerlist.py index 64bc87191003..98445671fe83 100644 --- a/scripts/gdb/linux/timerlist.py +++ b/scripts/gdb/linux/timerlist.py @@ -87,21 +87,22 @@ def print_cpu(hrtimer_bases, cpu, max_clock_bases): text += "\n" if constants.LX_CONFIG_TICK_ONESHOT: - fmts = [(" .{} : {}", 'nohz_mode'), - (" .{} : {} nsecs", 'last_tick'), - (" .{} : {}", 'tick_stopped'), - (" .{} : {}", 'idle_jiffies'), - (" .{} : {}", 'idle_calls'), - (" .{} : {}", 'idle_sleeps'), - (" .{} : {} nsecs", 'idle_entrytime'), - (" .{} : {} nsecs", 'idle_waketime'), - (" .{} : {} nsecs", 'idle_exittime'), - (" .{} : {} nsecs", 'idle_sleeptime'), - (" .{}: {} nsecs", 'iowait_sleeptime'), - (" .{} : {}", 'last_jiffies'), - (" .{} : {}", 'next_timer'), - (" .{} : {} nsecs", 'idle_expires')] - text += "\n".join([s.format(f, ts[f]) for s, f in fmts]) + TS_FLAG_STOPPED = 1 << 1 + TS_FLAG_NOHZ = 1 << 4 + text += f" .{'nohz':15s}: {int(bool(ts['flags'] & TS_FLAG_NOHZ))}\n" + text += f" .{'last_tick':15s}: {ts['last_tick']}\n" + text += f" .{'tick_stopped':15s}: {int(bool(ts['flags'] & TS_FLAG_STOPPED))}\n" + text += f" .{'idle_jiffies':15s}: {ts['idle_jiffies']}\n" + text += f" .{'idle_calls':15s}: {ts['idle_calls']}\n" + text += f" .{'idle_sleeps':15s}: {ts['idle_sleeps']}\n" + text += f" .{'idle_entrytime':15s}: {ts['idle_entrytime']} nsecs\n" + text += f" .{'idle_waketime':15s}: {ts['idle_waketime']} nsecs\n" + text += f" .{'idle_exittime':15s}: {ts['idle_exittime']} nsecs\n" + text += f" .{'idle_sleeptime':15s}: {ts['idle_sleeptime']} nsecs\n" + text += f" .{'iowait_sleeptime':15s}: {ts['iowait_sleeptime']} nsecs\n" + text += f" .{'last_jiffies':15s}: {ts['last_jiffies']}\n" + text += f" .{'next_timer':15s}: {ts['next_timer']}\n" + text += f" .{'idle_expires':15s}: {ts['idle_expires']} nsecs\n" text += "\njiffies: {}\n".format(jiffies) text += "\n" From 0c77e103c45fa1b119f5d3bb4625eee081c1a6cf Mon Sep 17 00:00:00 2001 From: Kuan-Ying Lee Date: Tue, 23 Jul 2024 14:48:58 +0800 Subject: [PATCH 025/103] scripts/gdb: add iteration function for rbtree Add inorder iteration function for rbtree usage. This is a preparation patch for the next patch to fix the gdb mounts issue. Link: https://lkml.kernel.org/r/20240723064902.124154-3-kuan-ying.lee@canonical.com Fixes: 2eea9ce4310d ("mounts: keep list of mounts in an rbtree") Signed-off-by: Kuan-Ying Lee Cc: Jan Kiszka Cc: Kieran Bingham Cc: Signed-off-by: Andrew Morton --- scripts/gdb/linux/rbtree.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/scripts/gdb/linux/rbtree.py b/scripts/gdb/linux/rbtree.py index fe462855eefd..fcbcc5f4153c 100644 --- a/scripts/gdb/linux/rbtree.py +++ b/scripts/gdb/linux/rbtree.py @@ -9,6 +9,18 @@ from linux import utils rb_root_type = utils.CachedType("struct rb_root") rb_node_type = utils.CachedType("struct rb_node") +def rb_inorder_for_each(root): + def inorder(node): + if node: + yield from inorder(node['rb_left']) + yield node + yield from inorder(node['rb_right']) + + yield from inorder(root['rb_node']) + +def rb_inorder_for_each_entry(root, gdbtype, member): + for node in rb_inorder_for_each(root): + yield utils.container_of(node, gdbtype, member) def rb_first(root): if root.type == rb_root_type.get_type(): From 4b183f613924ad536be2f8bd12b307e9c5a96bf6 Mon Sep 17 00:00:00 2001 From: Kuan-Ying Lee Date: Tue, 23 Jul 2024 14:48:59 +0800 Subject: [PATCH 026/103] scripts/gdb: fix lx-mounts command error (gdb) lx-mounts mount super_block devname pathname fstype options Python Exception : There is no member named list. Error occurred in Python: There is no member named list. We encounter the above issue after commit 2eea9ce4310d ("mounts: keep list of mounts in an rbtree"). The commit move a mount from list into rbtree. So we can instead use rbtree to iterate all mounts information. Link: https://lkml.kernel.org/r/20240723064902.124154-4-kuan-ying.lee@canonical.com Fixes: 2eea9ce4310d ("mounts: keep list of mounts in an rbtree") Signed-off-by: Kuan-Ying Lee Cc: Jan Kiszka Cc: Kieran Bingham Cc: Signed-off-by: Andrew Morton --- scripts/gdb/linux/proc.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/gdb/linux/proc.py b/scripts/gdb/linux/proc.py index 43c687e7a69d..65dd1bd12964 100644 --- a/scripts/gdb/linux/proc.py +++ b/scripts/gdb/linux/proc.py @@ -18,6 +18,7 @@ from linux import utils from linux import tasks from linux import lists from linux import vfs +from linux import rbtree from struct import * @@ -172,8 +173,7 @@ values of that process namespace""" gdb.write("{:^18} {:^15} {:>9} {} {} options\n".format( "mount", "super_block", "devname", "pathname", "fstype")) - for mnt in lists.list_for_each_entry(namespace['list'], - mount_ptr_type, "mnt_list"): + for mnt in rbtree.rb_inorder_for_each_entry(namespace['mounts'], mount_ptr_type, "mnt_node"): devname = mnt['mnt_devname'].string() devname = devname if devname else "none" From 35249f68b5d38bff1c616cc9761ecc3d820163b1 Mon Sep 17 00:00:00 2001 From: Kuan-Ying Lee Date: Tue, 23 Jul 2024 14:49:00 +0800 Subject: [PATCH 027/103] scripts/gdb: add 'lx-stack_depot_lookup' command. This command allows users to quickly retrieve a stacktrace using a handle obtained from a memory coredump. Example output: (gdb) lx-stack_depot_lookup 0x00c80300 0xffff8000807965b4 : mov x20, x0 0xffff800081a077d8 : mov x1, x0 0xffff800081a079a0 : cbnz w0, 0xffff800081a07968 0xffff800082f4a3fc : ldr x19, [sp, #16] 0xffff800080a0fb34 : ldp x3, x4, [sp, #96] 0xffff800080a0a550 : ldp x19, x20, [sp, #16] 0xffff8000808e7b40 : mov w5, w0 0xffff800080a0b8ac : mov x23, x0 0xffff800080914a48 : mov x6, x0 0xffff8000809151c4 : ldr x21, [sp, #32] Link: https://lkml.kernel.org/r/20240723064902.124154-5-kuan-ying.lee@canonical.com Signed-off-by: Kuan-Ying Lee Cc: Jan Kiszka Cc: Kieran Bingham Signed-off-by: Andrew Morton --- scripts/gdb/linux/stackdepot.py | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/scripts/gdb/linux/stackdepot.py b/scripts/gdb/linux/stackdepot.py index bb3a0f843931..37313a5a51a0 100644 --- a/scripts/gdb/linux/stackdepot.py +++ b/scripts/gdb/linux/stackdepot.py @@ -13,6 +13,13 @@ if constants.LX_CONFIG_STACKDEPOT: stack_record_type = utils.CachedType('struct stack_record') DEPOT_STACK_ALIGN = 4 +def help(): + t = """Usage: lx-stack_depot_lookup [Hex handle value] + Example: + lx-stack_depot_lookup 0x00c80300\n""" + gdb.write("Unrecognized command\n") + raise gdb.GdbError(t) + def stack_depot_fetch(handle): global DEPOT_STACK_ALIGN global stack_record_type @@ -57,3 +64,23 @@ def stack_depot_print(handle): gdb.execute("x /i 0x%x" % (int(entries[i]))) except Exception as e: gdb.write("%s\n" % e) + +class StackDepotLookup(gdb.Command): + """Search backtrace by handle""" + + def __init__(self): + if constants.LX_CONFIG_STACKDEPOT: + super(StackDepotLookup, self).__init__("lx-stack_depot_lookup", gdb.COMMAND_SUPPORT) + + def invoke(self, args, from_tty): + if not constants.LX_CONFIG_STACKDEPOT: + raise gdb.GdbError('CONFIG_STACKDEPOT is not set') + + argv = gdb.string_to_argv(args) + if len(argv) == 1: + handle = int(argv[0], 16) + stack_depot_print(gdb.Value(handle).cast(utils.get_uint_type())) + else: + help() + +StackDepotLookup() From 0833952c0768daea7d9b6dc59a35bef309234b88 Mon Sep 17 00:00:00 2001 From: Kuan-Ying Lee Date: Tue, 23 Jul 2024 14:49:01 +0800 Subject: [PATCH 028/103] scripts/gdb: add 'lx-kasan_mem_to_shadow' command This command allows users to quickly translate memory address to the kasan shadow memory address. Example output: (gdb) lx-kasan_mem_to_shadow 0xffff000019acc008 shadow addr: 0xffff600003359801 Link: https://lkml.kernel.org/r/20240723064902.124154-6-kuan-ying.lee@canonical.com Signed-off-by: Kuan-Ying Lee Cc: Jan Kiszka Cc: Kieran Bingham Signed-off-by: Andrew Morton --- scripts/gdb/linux/kasan.py | 44 ++++++++++++++++++++++++++++++++++++++ scripts/gdb/vmlinux-gdb.py | 1 + 2 files changed, 45 insertions(+) create mode 100644 scripts/gdb/linux/kasan.py diff --git a/scripts/gdb/linux/kasan.py b/scripts/gdb/linux/kasan.py new file mode 100644 index 000000000000..56730b3fde0b --- /dev/null +++ b/scripts/gdb/linux/kasan.py @@ -0,0 +1,44 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Copyright 2024 Canonical Ltd. +# +# Authors: +# Kuan-Ying Lee +# + +import gdb +from linux import constants, mm + +def help(): + t = """Usage: lx-kasan_mem_to_shadow [Hex memory addr] + Example: + lx-kasan_mem_to_shadow 0xffff000008eca008\n""" + gdb.write("Unrecognized command\n") + raise gdb.GdbError(t) + +class KasanMemToShadow(gdb.Command): + """Translate memory address to kasan shadow address""" + + p_ops = None + + def __init__(self): + if constants.LX_CONFIG_KASAN_GENERIC or constants.LX_CONFIG_KASAN_SW_TAGS: + super(KasanMemToShadow, self).__init__("lx-kasan_mem_to_shadow", gdb.COMMAND_SUPPORT) + + def invoke(self, args, from_tty): + if not constants.LX_CONFIG_KASAN_GENERIC or constants.LX_CONFIG_KASAN_SW_TAGS: + raise gdb.GdbError('CONFIG_KASAN_GENERIC or CONFIG_KASAN_SW_TAGS is not set') + + argv = gdb.string_to_argv(args) + if len(argv) == 1: + if self.p_ops is None: + self.p_ops = mm.page_ops().ops + addr = int(argv[0], 16) + shadow_addr = self.kasan_mem_to_shadow(addr) + gdb.write('shadow addr: 0x%x\n' % shadow_addr) + else: + help() + def kasan_mem_to_shadow(self, addr): + return (addr >> self.p_ops.KASAN_SHADOW_SCALE_SHIFT) + self.p_ops.KASAN_SHADOW_OFFSET + +KasanMemToShadow() diff --git a/scripts/gdb/vmlinux-gdb.py b/scripts/gdb/vmlinux-gdb.py index fc53cdf286f1..d4eeed4506fd 100644 --- a/scripts/gdb/vmlinux-gdb.py +++ b/scripts/gdb/vmlinux-gdb.py @@ -49,3 +49,4 @@ else: import linux.page_owner import linux.slab import linux.vmalloc + import linux.kasan From 7b76689a021d19a016310bd5da35450641b67966 Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Sun, 14 Jul 2024 19:33:07 +0200 Subject: [PATCH 029/103] dyndbg: use seq_putc() in ddebug_proc_show() Single characters should be put into a sequence. Thus use the corresponding function "seq_putc". This issue was transformed by using the Coccinelle software. Link: https://lkml.kernel.org/r/375b5b4b-6295-419e-bae9-da724a7a682d@web.de Signed-off-by: Markus Elfring Cc: Jason Baron Cc: Jim Cromie Signed-off-by: Andrew Morton --- lib/dynamic_debug.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c index f2c5e7910bb1..5a007952f7f2 100644 --- a/lib/dynamic_debug.c +++ b/lib/dynamic_debug.c @@ -1147,7 +1147,7 @@ static int ddebug_proc_show(struct seq_file *m, void *p) iter->table->mod_name, dp->function, ddebug_describe_flags(dp->flags, &flags)); seq_escape_str(m, dp->format, ESCAPE_SPACE, "\t\r\n\""); - seq_puts(m, "\""); + seq_putc(m, '"'); if (dp->class_id != _DPRINTK_CLASS_DFLT) { class = ddebug_class_name(iter, dp); @@ -1156,7 +1156,7 @@ static int ddebug_proc_show(struct seq_file *m, void *p) else seq_printf(m, " class unknown, _id:%d", dp->class_id); } - seq_puts(m, "\n"); + seq_putc(m, '\n'); return 0; } From fbe617af697c336db7630762158127eaa5a1d223 Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Sun, 14 Jul 2024 19:15:09 +0200 Subject: [PATCH 030/103] closures: use seq_putc() in debug_show() A single line break should be put into a sequence. Thus use the corresponding function "seq_putc". This issue was transformed by using the Coccinelle software. Link: https://lkml.kernel.org/r/e7faa2c4-9590-44b4-8669-69ef810277b1@web.de Signed-off-by: Markus Elfring Cc: Kent Overstreet Signed-off-by: Andrew Morton --- lib/closure.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/closure.c b/lib/closure.c index 116afae2eed9..2bfe7d2a0048 100644 --- a/lib/closure.c +++ b/lib/closure.c @@ -278,7 +278,7 @@ static int debug_show(struct seq_file *f, void *data) seq_printf(f, " W %pS\n", (void *) cl->waiting_on); - seq_puts(f, "\n"); + seq_putc(f, '\n'); } spin_unlock_irq(&closure_list_lock); From 9a42bfd255b288dad2d1a9df0a1fe58394d5da12 Mon Sep 17 00:00:00 2001 From: Deshan Zhang Date: Thu, 25 Jul 2024 17:30:45 +0800 Subject: [PATCH 031/103] lib/lru_cache: fix spelling mistake "colision"->"collision" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is a spelling mistake in a literal string and in cariable names. Fix these. Link: https://lkml.kernel.org/r/20240725093044.1742842-1-deshan@nfschina.com Signed-off-by: Deshan Zhang Cc: Christoph Böhmwalder Cc: Lars Ellenberg Cc: Philipp Reisner Signed-off-by: Andrew Morton --- include/linux/lru_cache.h | 4 ++-- lib/lru_cache.c | 10 +++++----- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/include/linux/lru_cache.h b/include/linux/lru_cache.h index c9afcdd9324c..ff82ef85a084 100644 --- a/include/linux/lru_cache.h +++ b/include/linux/lru_cache.h @@ -119,7 +119,7 @@ write intent log information, three of which are mentioned here. */ /* this defines an element in a tracked set - * .colision is for hash table lookup. + * .collision is for hash table lookup. * When we process a new IO request, we know its sector, thus can deduce the * region number (label) easily. To do the label -> object lookup without a * full list walk, we use a simple hash table. @@ -145,7 +145,7 @@ write intent log information, three of which are mentioned here. * But it avoids high order page allocations in kmalloc. */ struct lc_element { - struct hlist_node colision; + struct hlist_node collision; struct list_head list; /* LRU list or free list */ unsigned refcnt; /* back "pointer" into lc_cache->element[index], diff --git a/lib/lru_cache.c b/lib/lru_cache.c index b3d9187611de..9e0d469c7658 100644 --- a/lib/lru_cache.c +++ b/lib/lru_cache.c @@ -243,7 +243,7 @@ static struct lc_element *__lc_find(struct lru_cache *lc, unsigned int enr, BUG_ON(!lc); BUG_ON(!lc->nr_elements); - hlist_for_each_entry(e, lc_hash_slot(lc, enr), colision) { + hlist_for_each_entry(e, lc_hash_slot(lc, enr), collision) { /* "about to be changed" elements, pending transaction commit, * are hashed by their "new number". "Normal" elements have * lc_number == lc_new_number. */ @@ -303,7 +303,7 @@ void lc_del(struct lru_cache *lc, struct lc_element *e) BUG_ON(e->refcnt); e->lc_number = e->lc_new_number = LC_FREE; - hlist_del_init(&e->colision); + hlist_del_init(&e->collision); list_move(&e->list, &lc->free); RETURN(); } @@ -324,9 +324,9 @@ static struct lc_element *lc_prepare_for_change(struct lru_cache *lc, unsigned n PARANOIA_LC_ELEMENT(lc, e); e->lc_new_number = new_number; - if (!hlist_unhashed(&e->colision)) - __hlist_del(&e->colision); - hlist_add_head(&e->colision, lc_hash_slot(lc, new_number)); + if (!hlist_unhashed(&e->collision)) + __hlist_del(&e->collision); + hlist_add_head(&e->collision, lc_hash_slot(lc, new_number)); list_move(&e->list, &lc->to_be_changed); return e; From b6e21b71208f289a796d786bd695ec25eae4ed9a Mon Sep 17 00:00:00 2001 From: Thorsten Blum Date: Fri, 26 Jul 2024 17:49:46 +0200 Subject: [PATCH 032/103] lib: checksum: use ARRAY_SIZE() to improve assert_setup_correct() Use ARRAY_SIZE() to simplify the assert_setup_correct() function and improve its readability. Link: https://lkml.kernel.org/r/20240726154946.230928-1-thorsten.blum@toblux.com Signed-off-by: Thorsten Blum Signed-off-by: Andrew Morton --- lib/checksum_kunit.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/lib/checksum_kunit.c b/lib/checksum_kunit.c index 4e4d081a1d3b..be04aa42125c 100644 --- a/lib/checksum_kunit.c +++ b/lib/checksum_kunit.c @@ -468,12 +468,9 @@ static __wsum to_wsum(u32 x) static void assert_setup_correct(struct kunit *test) { - CHECK_EQ(sizeof(random_buf) / sizeof(random_buf[0]), MAX_LEN); - CHECK_EQ(sizeof(expected_results) / sizeof(expected_results[0]), - MAX_LEN); - CHECK_EQ(sizeof(init_sums_no_overflow) / - sizeof(init_sums_no_overflow[0]), - MAX_LEN); + CHECK_EQ(ARRAY_SIZE(random_buf), MAX_LEN); + CHECK_EQ(ARRAY_SIZE(expected_results), MAX_LEN); + CHECK_EQ(ARRAY_SIZE(init_sums_no_overflow), MAX_LEN); } /* From 00bd8ec2f7cb40e438f5c9eb9ea2110d1ce5e165 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Mon, 29 Jul 2024 10:40:44 -0700 Subject: [PATCH 033/103] fs/procfs: remove build ID-related code duplication in PROCMAP_QUERY A piece of build ID handling code in PROCMAP_QUERY ioctl() was accidentally duplicated. It wasn't meant to be part of ed5d583a88a9 ("fs/procfs: implement efficient VMA querying API for /proc//maps") commit, which is what introduced duplication. It has no correctness implications, but we unnecessarily perform the same work twice, if build ID parsing is requested. Drop the duplication. Link: https://lkml.kernel.org/r/20240729174044.4008399-1-andrii@kernel.org Fixes: ed5d583a88a9 ("fs/procfs: implement efficient VMA querying API for /proc//maps") Signed-off-by: Andrii Nakryiko Reported-by: Jann Horn Cc: Alexey Dobriyan Signed-off-by: Andrew Morton --- fs/proc/task_mmu.c | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 5f171ad7b436..3ba613052506 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -543,21 +543,6 @@ static int do_procmap_query(struct proc_maps_private *priv, void __user *uarg) } } - if (karg.build_id_size) { - __u32 build_id_sz; - - err = build_id_parse(vma, build_id_buf, &build_id_sz); - if (err) { - karg.build_id_size = 0; - } else { - if (karg.build_id_size < build_id_sz) { - err = -ENAMETOOLONG; - goto out; - } - karg.build_id_size = build_id_sz; - } - } - if (karg.vma_name_size) { size_t name_buf_sz = min_t(size_t, PATH_MAX, karg.vma_name_size); const struct path *path; From 59d58189f3d96eeb31b0b4a8a8aab2cd6a6afb82 Mon Sep 17 00:00:00 2001 From: Jinjie Ruan Date: Mon, 29 Jul 2024 19:52:52 +0800 Subject: [PATCH 034/103] crash: fix crash memory reserve exceed system memory bug On x86_32 Qemu machine with 1GB memory, the cmdline "crashkernel=4G" is ok as below: crashkernel reserved: 0x0000000020000000 - 0x0000000120000000 (4096 MB) It's similar on other architectures, such as ARM32 and RISCV32. The cause is that the crash_size is parsed and printed with "unsigned long long" data type which is 8 bytes but allocated used with "phys_addr_t" which is 4 bytes in memblock_phys_alloc_range(). Fix it by checking if crash_size is greater than system RAM size and return error if so. After this patch, there is no above confusing reserve success info. Link: https://lkml.kernel.org/r/20240729115252.1659112-1-ruanjinjie@huawei.com Signed-off-by: Jinjie Ruan Suggested-by: Mike Rapoport Acked-by: Baoquan He Cc: Albert Ou Cc: Dave Young Cc: Palmer Dabbelt Cc: Paul Walmsley Cc: Vivek Goyal Signed-off-by: Andrew Morton --- kernel/crash_reserve.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/kernel/crash_reserve.c b/kernel/crash_reserve.c index 64d44a52c011..a620fb4b2116 100644 --- a/kernel/crash_reserve.c +++ b/kernel/crash_reserve.c @@ -335,6 +335,9 @@ int __init parse_crashkernel(char *cmdline, if (!*crash_size) ret = -EINVAL; + if (*crash_size >= system_ram) + ret = -EINVAL; + return ret; } From 5b9da39dc58abcbdceaf9c2d283d0f64ece5bbdf Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Tue, 30 Jul 2024 09:08:13 -0700 Subject: [PATCH 035/103] failcmd: add script file in MAINTAINERS failcmd is one of the main interfaces to fault injection framework, but, it is not listed under FAULT INJECTION SUPPORT entry in MAINTAINERS. This is unfortunate, since git-send-email doesn't find emails to send the patches to, forcing the user to try to guess who maintains it. Akinobu Mita seems to be actively maintaining it, so, let's add the file under FAULT INJECTION SUPPORT section. Link: https://lkml.kernel.org/r/20240730160814.1979876-1-leitao@debian.org Signed-off-by: Breno Leitao Reviewed-by: Akinobu Mita Signed-off-by: Andrew Morton --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 8fab7a347fc0..fa9a13416f18 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -8541,6 +8541,7 @@ M: Akinobu Mita S: Supported F: Documentation/fault-injection/ F: lib/fault-inject.c +F: tools/testing/fault-injection/ FBTFT Framebuffer drivers L: dri-devel@lists.freedesktop.org From f6fc302db018f09bb294e918eb0724d6948fa5ba Mon Sep 17 00:00:00 2001 From: Jeff Johnson Date: Tue, 30 Jul 2024 07:43:18 -0700 Subject: [PATCH 036/103] crypto: arm/xor - add missing MODULE_DESCRIPTION() macro Patch series "treewide: add missing MODULE_DESCRIPTION() macros". Since commit 1fffe7a34c89 ("script: modpost: emit a warning when the description is missing"), a module without a MODULE_DESCRIPTION() will result in a warning when built with make W=1. Recently, multiple developers have been eradicating these warnings treewide, and I personally submitted almost 300 patches over the past few months. Almost all of my patches landed by 6.11-rc1, either by being merged in a 6.10-rc or by being merged in the 6.11 merge window. However, a few of my patches did not land. This patch (of 5): With ARCH=arm and CONFIG_KERNEL_MODE_NEON=y, make W=1 C=1 reports: WARNING: modpost: missing MODULE_DESCRIPTION() in arch/arm/lib/xor-neon.o Add the missing invocation of the MODULE_DESCRIPTION() macro. Link: https://lkml.kernel.org/r/20240730-module_description_orphans-v1-0-7094088076c8@quicinc.com Link: https://lkml.kernel.org/r/20240730-module_description_orphans-v1-1-7094088076c8@quicinc.com Signed-off-by: Jeff Johnson Cc: Alexandre Torgue Cc: Alistar Popple Cc: Andrew Jeffery Cc: Andy Lutomirski Cc: Arnd Bergmann Cc: Boqun Feng Cc: Borislav Petkov Cc: Christophe Leroy Cc: Dave Hansen Cc: Eddie James Cc: Greg Kroah-Hartman Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Jeremy Kerr Cc: Joel Stanley Cc: Karol Herbst Cc: Masami Hiramatsu Cc: Maxime Coquelin Cc: Michael Ellerman (powerpc) Cc: Naveen N Rao Cc: Nicholas Piggin Cc: Nouveau Cc: Pekka Paalanen Cc: Peter Zijlstra Cc: Rafael J. Wysocki Cc: Russell King Cc: Steven Rostedt (Google) Cc: Thomas Gleixner Cc: Viresh Kumar Cc: Waiman Long Cc: Will Deacon Signed-off-by: Andrew Morton --- arch/arm/lib/xor-neon.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/lib/xor-neon.c b/arch/arm/lib/xor-neon.c index 522510baed49..cf57fca97908 100644 --- a/arch/arm/lib/xor-neon.c +++ b/arch/arm/lib/xor-neon.c @@ -8,6 +8,7 @@ #include #include +MODULE_DESCRIPTION("NEON accelerated XOR implementation"); MODULE_LICENSE("GPL"); #ifndef __ARM_NEON__ From fc5def2c2ad049588c875d86c7408537300ee43e Mon Sep 17 00:00:00 2001 From: Jeff Johnson Date: Tue, 30 Jul 2024 07:43:19 -0700 Subject: [PATCH 037/103] x86/mm: add testmmiotrace MODULE_DESCRIPTION() Fix the following 'make W=1' warning: WARNING: modpost: missing MODULE_DESCRIPTION() in arch/x86/mm/testmmiotrace.o Link: https://lkml.kernel.org/r/20240730-module_description_orphans-v1-2-7094088076c8@quicinc.com Signed-off-by: Jeff Johnson Cc: Alexandre Torgue Cc: Alistar Popple Cc: Andrew Jeffery Cc: Andy Lutomirski Cc: Arnd Bergmann Cc: Boqun Feng Cc: Borislav Petkov Cc: Christophe Leroy Cc: Dave Hansen Cc: Eddie James Cc: Greg Kroah-Hartman Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Jeremy Kerr Cc: Joel Stanley Cc: Karol Herbst Cc: Masami Hiramatsu Cc: Maxime Coquelin Cc: Michael Ellerman Cc: Naveen N Rao Cc: Nicholas Piggin Cc: Nouveau Cc: Pekka Paalanen Cc: Peter Zijlstra Cc: Rafael J. Wysocki Cc: Russell King Cc: Steven Rostedt (Google) Cc: Thomas Gleixner Cc: Viresh Kumar Cc: Waiman Long Cc: Will Deacon Signed-off-by: Andrew Morton --- arch/x86/mm/testmmiotrace.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/mm/testmmiotrace.c b/arch/x86/mm/testmmiotrace.c index bda73cb7a044..ae295659ca14 100644 --- a/arch/x86/mm/testmmiotrace.c +++ b/arch/x86/mm/testmmiotrace.c @@ -144,3 +144,4 @@ static void __exit cleanup(void) module_init(init); module_exit(cleanup); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Test module for mmiotrace"); From 588661fd87a79c89b506abdba186cb58c07a5dfc Mon Sep 17 00:00:00 2001 From: Jeff Johnson Date: Tue, 30 Jul 2024 07:43:22 -0700 Subject: [PATCH 038/103] locking/ww_mutex/test: add MODULE_DESCRIPTION() Fix the 'make W=1' warning: WARNING: modpost: missing MODULE_DESCRIPTION() in kernel/locking/test-ww_mutex.o Link: https://lkml.kernel.org/r/20240730-module_description_orphans-v1-5-7094088076c8@quicinc.com Signed-off-by: Jeff Johnson Acked-by: Waiman Long Cc: Alexandre Torgue Cc: Alistar Popple Cc: Andrew Jeffery Cc: Andy Lutomirski Cc: Arnd Bergmann Cc: Boqun Feng Cc: Borislav Petkov Cc: Christophe Leroy Cc: Dave Hansen Cc: Eddie James Cc: Greg Kroah-Hartman Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Jeremy Kerr Cc: Joel Stanley Cc: Karol Herbst Cc: Masami Hiramatsu Cc: Maxime Coquelin Cc: Michael Ellerman Cc: Naveen N Rao Cc: Nicholas Piggin Cc: Nouveau Cc: Pekka Paalanen Cc: Peter Zijlstra Cc: Rafael J. Wysocki Cc: Russell King Cc: Steven Rostedt (Google) Cc: Thomas Gleixner Cc: Viresh Kumar Cc: Will Deacon Signed-off-by: Andrew Morton --- kernel/locking/test-ww_mutex.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/locking/test-ww_mutex.c b/kernel/locking/test-ww_mutex.c index 78719e1ef1b1..10a5736a21c2 100644 --- a/kernel/locking/test-ww_mutex.c +++ b/kernel/locking/test-ww_mutex.c @@ -697,3 +697,4 @@ module_exit(test_ww_mutex_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Intel Corporation"); +MODULE_DESCRIPTION("API test facility for ww_mutexes"); From 11ee88a0f98770719f29b7d1efb2a2ca6a83af3c Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Mon, 29 Jul 2024 01:45:08 -0700 Subject: [PATCH 039/103] fault-injection: enhance failcmd to exit on non-hex address input The failcmd.sh script in the fault-injection toolkit does not currently validate whether the provided address is in hexadecimal format. This can lead to silent failures if the address is sourced from places like `/proc/kallsyms`, which omits the '0x' prefix, potentially causing users to operate under incorrect assumptions. Introduce a new function, `exit_if_not_hex`, which checks the format of the provided address and exits with an error message if the address is not a valid hexadecimal number. This enhancement prevents users from running the command with improperly formatted addresses, thus improving the robustness and usability of the failcmd tool. Link: https://lkml.kernel.org/r/20240729084512.3349928-1-leitao@debian.org Signed-off-by: Breno Leitao Reviewed-by: Akinobu Mita Signed-off-by: Andrew Morton --- tools/testing/fault-injection/failcmd.sh | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/tools/testing/fault-injection/failcmd.sh b/tools/testing/fault-injection/failcmd.sh index 78dac34264be..c4f2432750f4 100644 --- a/tools/testing/fault-injection/failcmd.sh +++ b/tools/testing/fault-injection/failcmd.sh @@ -64,6 +64,14 @@ ENVIRONMENT EOF } +exit_if_not_hex() { + local value="$1" + if ! [[ $value =~ ^0x[0-9a-fA-F]+$ ]]; then + echo "Error: The provided value '$value' is not a valid hexadecimal number." >&2 + exit 1 + fi +} + if [ $UID != 0 ]; then echo must be run as root >&2 exit 1 @@ -160,18 +168,22 @@ while true; do shift 2 ;; --require-start) + exit_if_not_hex "$2" echo $2 > $FAULTATTR/require-start shift 2 ;; --require-end) + exit_if_not_hex "$2" echo $2 > $FAULTATTR/require-end shift 2 ;; --reject-start) + exit_if_not_hex "$2" echo $2 > $FAULTATTR/reject-start shift 2 ;; --reject-end) + exit_if_not_hex "$2" echo $2 > $FAULTATTR/reject-end shift 2 ;; From 8af2caf7307d8002d88c377c58eba8cae75a2109 Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Mon, 29 Jul 2024 01:52:11 -0700 Subject: [PATCH 040/103] failcmd: make failcmd.sh executable Change the file permissions of tools/testing/fault-injection/failcmd.sh to allow execution. This ensures the script can be run directly without explicitly invoking a shell. Link: https://lkml.kernel.org/r/20240729085215.3403417-1-leitao@debian.org Signed-off-by: Breno Leitao Reviewed-by: Akinobu Mita Signed-off-by: Andrew Morton --- tools/testing/fault-injection/failcmd.sh | 0 1 file changed, 0 insertions(+), 0 deletions(-) mode change 100644 => 100755 tools/testing/fault-injection/failcmd.sh diff --git a/tools/testing/fault-injection/failcmd.sh b/tools/testing/fault-injection/failcmd.sh old mode 100644 new mode 100755 From e0ba72e3a4422c4255fa80191a637d7c65ef4c59 Mon Sep 17 00:00:00 2001 From: "J. R. Okajima" Date: Tue, 23 Jul 2024 16:40:17 +0000 Subject: [PATCH 041/103] lockdep: upper limit LOCKDEP_CHAINS_BITS CONFIG_LOCKDEP_CHAINS_BITS value decides the size of chain_hlocks[] in kernel/locking/lockdep.c, and it is checked by add_chain_cache() with BUILD_BUG_ON((1UL << 24) <= ARRAY_SIZE(chain_hlocks)); This patch is just to silence BUILD_BUG_ON(). See also https://lore.kernel.org/all/30795.1620913191@jrobl/ [cmllamas@google.com: fix minor checkpatch issues in commit log] Link: https://lkml.kernel.org/r/20240723164018.2489615-1-cmllamas@google.com Signed-off-by: J. R. Okajima Signed-off-by: Carlos Llamas Acked-by: Tetsuo Handa Cc: Peter Zijlstra Cc: Boqun Feng Cc: Ingo Molnar Cc: Waiman Long Cc: Will Deacon Signed-off-by: Andrew Morton --- lib/Kconfig.debug | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index bf0995d328b3..a81d452941ce 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1515,7 +1515,7 @@ config LOCKDEP_BITS config LOCKDEP_CHAINS_BITS int "Bitsize for MAX_LOCKDEP_CHAINS" depends on LOCKDEP && !LOCKDEP_SMALL - range 10 30 + range 10 21 default 16 help Try increasing this value if you hit "BUG: MAX_LOCKDEP_CHAINS too low!" message. From 97cf8f5f93f8419d5e7902b89194530466e73bcd Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Fri, 2 Aug 2024 11:16:21 -0400 Subject: [PATCH 042/103] watchdog: handle the ENODEV failure case of lockup_detector_delay_init() separately MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When watchdog_hardlockup_probe() is being called by lockup_detector_delay_init(), an error return of -ENODEV will happen for the arm64 arch when arch_perf_nmi_is_available() returns false. This means that NMI is not usable by the hard lockup detector and so has to be disabled. This can be considered a deficiency in that particular arm64 chip, but there is nothing we can do about it. That also means the following error will always be reported when the kernel boot up. watchdog: Delayed init of the lockup detector failed: -19 The word "failed" itself has a connotation that there is something wrong with the kernel which is not really the case here. Handle this special ENODEV case separately and explain the reason behind disabling hard lockup detector without causing anxiety for those users who read the above message and wonder about it. Link: https://lkml.kernel.org/r/20240802151621.617244-1-longman@redhat.com Signed-off-by: Waiman Long Cc: Douglas Anderson Cc: Joel Granados Cc: Li Zhe Cc: Petr Mladek Cc: Thomas Weißschuh Signed-off-by: Andrew Morton --- kernel/watchdog.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 830a83895493..262691ba62b7 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -1203,7 +1203,10 @@ static void __init lockup_detector_delay_init(struct work_struct *work) ret = watchdog_hardlockup_probe(); if (ret) { - pr_info("Delayed init of the lockup detector failed: %d\n", ret); + if (ret == -ENODEV) + pr_info("NMI not fully supported\n"); + else + pr_info("Delayed init of the lockup detector failed: %d\n", ret); pr_info("Hard watchdog permanently disabled\n"); return; } From a15bec6a8f2f177e6c1388f23d02436e27994299 Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Tue, 6 Aug 2024 08:39:27 -0700 Subject: [PATCH 043/103] lib/rhashtable: cleanup fallback check in bucket_table_alloc() Upon allocation failure, the current check with the nofail bits is unnecessary, and further stands in the way of discouraging direct use of __GFP_NOFAIL. Remove this and replace with the proper way of determining if doing a non-blocking allocation for the nested table case. Link: https://lkml.kernel.org/r/20240806153927.184515-1-dave@stgolabs.net Signed-off-by: Davidlohr Bueso Suggested-by: Michal Hocko Cc: Davidlohr Bueso Cc: Herbert Xu Signed-off-by: Andrew Morton --- lib/rhashtable.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/rhashtable.c b/lib/rhashtable.c index dbbed19f8fff..6c902639728b 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -189,7 +189,7 @@ static struct bucket_table *bucket_table_alloc(struct rhashtable *ht, size = nbuckets; - if (tbl == NULL && (gfp & ~__GFP_NOFAIL) != GFP_KERNEL) { + if (tbl == NULL && !gfpflags_allow_blocking(gfp)) { tbl = nested_bucket_table_alloc(ht, nbuckets, gfp); nbuckets = 0; } From 6ce2082fd3a25d5a8c756120959237cace0379f1 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Tue, 13 Aug 2024 15:12:35 +0300 Subject: [PATCH 044/103] fault-inject: improve build for CONFIG_FAULT_INJECTION=n MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The fault-inject.h users across the kernel need to add a lot of #ifdef CONFIG_FAULT_INJECTION to cater for shortcomings in the header. Make fault-inject.h self-contained for CONFIG_FAULT_INJECTION=n, and add stubs for DECLARE_FAULT_ATTR(), setup_fault_attr(), should_fail_ex(), and should_fail() to allow removal of conditional compilation. [akpm@linux-foundation.org: repair fallout from no longer including debugfs.h into fault-inject.h] [akpm@linux-foundation.org: fix drivers/misc/xilinx_tmr_inject.c] [akpm@linux-foundation.org: Add debugfs.h inclusion to more files, per Stephen] Link: https://lkml.kernel.org/r/20240813121237.2382534-1-jani.nikula@intel.com Fixes: 6ff1cb355e62 ("[PATCH] fault-injection capabilities infrastructure") Signed-off-by: Jani Nikula Cc: Akinobu Mita Cc: Abhinav Kumar Cc: Dmitry Baryshkov Cc: Himal Prasad Ghimiray Cc: Lucas De Marchi Cc: Rob Clark Cc: Rodrigo Vivi Cc: Thomas Hellström Cc: Stephen Rothwell Signed-off-by: Andrew Morton --- drivers/gpu/drm/msm/msm_drv.c | 1 + drivers/iommu/iommufd/selftest.c | 1 + drivers/misc/xilinx_tmr_inject.c | 1 + drivers/nvme/host/fault_inject.c | 1 + drivers/ufs/core/ufs-fault-injection.c | 1 + include/linux/fault-inject.h | 36 +++++++++++++++++++++----- include/linux/mmc/host.h | 1 + include/ufs/ufshcd.h | 1 + kernel/futex/core.c | 1 + lib/fault-inject.c | 1 + mm/fail_page_alloc.c | 1 + mm/failslab.c | 1 + 12 files changed, 40 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c index 9c33f4e3f822..e018bc79e188 100644 --- a/drivers/gpu/drm/msm/msm_drv.c +++ b/drivers/gpu/drm/msm/msm_drv.c @@ -7,6 +7,7 @@ #include #include +#include #include #include diff --git a/drivers/iommu/iommufd/selftest.c b/drivers/iommu/iommufd/selftest.c index 222cfc11ebfd..db4032feccee 100644 --- a/drivers/iommu/iommufd/selftest.c +++ b/drivers/iommu/iommufd/selftest.c @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/misc/xilinx_tmr_inject.c b/drivers/misc/xilinx_tmr_inject.c index 73c6da7d0963..734fdfac19ef 100644 --- a/drivers/misc/xilinx_tmr_inject.c +++ b/drivers/misc/xilinx_tmr_inject.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include diff --git a/drivers/nvme/host/fault_inject.c b/drivers/nvme/host/fault_inject.c index 1d1b6441a339..105d6cb41c72 100644 --- a/drivers/nvme/host/fault_inject.c +++ b/drivers/nvme/host/fault_inject.c @@ -6,6 +6,7 @@ */ #include +#include #include "nvme.h" static DECLARE_FAULT_ATTR(fail_default_attr); diff --git a/drivers/ufs/core/ufs-fault-injection.c b/drivers/ufs/core/ufs-fault-injection.c index 169540417079..55db38e75cc4 100644 --- a/drivers/ufs/core/ufs-fault-injection.c +++ b/drivers/ufs/core/ufs-fault-injection.c @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include "ufs-fault-injection.h" diff --git a/include/linux/fault-inject.h b/include/linux/fault-inject.h index 354413950d34..8c829d28dcf3 100644 --- a/include/linux/fault-inject.h +++ b/include/linux/fault-inject.h @@ -2,13 +2,17 @@ #ifndef _LINUX_FAULT_INJECT_H #define _LINUX_FAULT_INJECT_H +#include +#include + +struct dentry; +struct kmem_cache; + #ifdef CONFIG_FAULT_INJECTION -#include -#include +#include #include #include -#include /* * For explanation of the elements of this struct, see @@ -51,6 +55,28 @@ int setup_fault_attr(struct fault_attr *attr, char *str); bool should_fail_ex(struct fault_attr *attr, ssize_t size, int flags); bool should_fail(struct fault_attr *attr, ssize_t size); +#else /* CONFIG_FAULT_INJECTION */ + +struct fault_attr { +}; + +#define DECLARE_FAULT_ATTR(name) struct fault_attr name = {} + +static inline int setup_fault_attr(struct fault_attr *attr, char *str) +{ + return 0; /* Note: 0 means error for __setup() handlers! */ +} +static inline bool should_fail_ex(struct fault_attr *attr, ssize_t size, int flags) +{ + return false; +} +static inline bool should_fail(struct fault_attr *attr, ssize_t size) +{ + return false; +} + +#endif /* CONFIG_FAULT_INJECTION */ + #ifdef CONFIG_FAULT_INJECTION_DEBUG_FS struct dentry *fault_create_debugfs_attr(const char *name, @@ -87,10 +113,6 @@ static inline void fault_config_init(struct fault_config *config, #endif /* CONFIG_FAULT_INJECTION_CONFIGFS */ -#endif /* CONFIG_FAULT_INJECTION */ - -struct kmem_cache; - #ifdef CONFIG_FAIL_PAGE_ALLOC bool should_fail_alloc_page(gfp_t gfp_mask, unsigned int order); #else diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index 88c6a76042ee..49470188fca7 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -10,6 +10,7 @@ #include #include #include +#include #include #include diff --git a/include/ufs/ufshcd.h b/include/ufs/ufshcd.h index 0fd2aebac728..3f68ae3e4330 100644 --- a/include/ufs/ufshcd.h +++ b/include/ufs/ufshcd.h @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include diff --git a/kernel/futex/core.c b/kernel/futex/core.c index 06a1f091be81..136768ae2637 100644 --- a/kernel/futex/core.c +++ b/kernel/futex/core.c @@ -34,6 +34,7 @@ #include #include #include +#include #include #include #include diff --git a/lib/fault-inject.c b/lib/fault-inject.c index d608f9b48c10..52eb6ba29698 100644 --- a/lib/fault-inject.c +++ b/lib/fault-inject.c @@ -2,6 +2,7 @@ #include #include #include +#include #include #include #include diff --git a/mm/fail_page_alloc.c b/mm/fail_page_alloc.c index 532851ce5132..7647096170e9 100644 --- a/mm/fail_page_alloc.c +++ b/mm/fail_page_alloc.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #include +#include #include #include diff --git a/mm/failslab.c b/mm/failslab.c index af16c2ed578f..c3901b136498 100644 --- a/mm/failslab.c +++ b/mm/failslab.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include #include +#include #include #include #include "slab.h" From f161cdd91b2a68ed846ecaac43b2f01af0ea61c8 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Tue, 13 Aug 2024 15:12:36 +0300 Subject: [PATCH 045/103] drm/msm: clean up fault injection usage MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With the proper stubs in place in linux/fault-inject.h, we can remove a bunch of conditional compilation for CONFIG_FAULT_INJECTION=n. Link: https://lkml.kernel.org/r/20240813121237.2382534-2-jani.nikula@intel.com Signed-off-by: Jani Nikula Reviewed-by: Thomas Hellström Reviewed-by: Himal Prasad Ghimiray Reviewed-by: Abhinav Kumar Cc: Akinobu Mita Cc: Rob Clark Cc: Dmitry Baryshkov Cc: Lucas De Marchi Cc: Rodrigo Vivi Signed-off-by: Andrew Morton --- drivers/gpu/drm/msm/msm_debugfs.c | 2 -- drivers/gpu/drm/msm/msm_drv.c | 2 -- drivers/gpu/drm/msm/msm_drv.h | 4 ---- 3 files changed, 8 deletions(-) diff --git a/drivers/gpu/drm/msm/msm_debugfs.c b/drivers/gpu/drm/msm/msm_debugfs.c index 4494f6d1c7cb..7ab607252d18 100644 --- a/drivers/gpu/drm/msm/msm_debugfs.c +++ b/drivers/gpu/drm/msm/msm_debugfs.c @@ -357,12 +357,10 @@ void msm_debugfs_init(struct drm_minor *minor) if (priv->kms && priv->kms->funcs->debugfs_init) priv->kms->funcs->debugfs_init(priv->kms, minor); -#ifdef CONFIG_FAULT_INJECTION fault_create_debugfs_attr("fail_gem_alloc", minor->debugfs_root, &fail_gem_alloc); fault_create_debugfs_attr("fail_gem_iova", minor->debugfs_root, &fail_gem_iova); -#endif } #endif diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c index e018bc79e188..8c13b08708d2 100644 --- a/drivers/gpu/drm/msm/msm_drv.c +++ b/drivers/gpu/drm/msm/msm_drv.c @@ -59,10 +59,8 @@ static bool modeset = true; MODULE_PARM_DESC(modeset, "Use kernel modesetting [KMS] (1=on (default), 0=disable)"); module_param(modeset, bool, 0600); -#ifdef CONFIG_FAULT_INJECTION DECLARE_FAULT_ATTR(fail_gem_alloc); DECLARE_FAULT_ATTR(fail_gem_iova); -#endif static int msm_drm_uninit(struct device *dev) { diff --git a/drivers/gpu/drm/msm/msm_drv.h b/drivers/gpu/drm/msm/msm_drv.h index be016d7b4ef1..9b953860131b 100644 --- a/drivers/gpu/drm/msm/msm_drv.h +++ b/drivers/gpu/drm/msm/msm_drv.h @@ -33,12 +33,8 @@ #include #include -#ifdef CONFIG_FAULT_INJECTION extern struct fault_attr fail_gem_alloc; extern struct fault_attr fail_gem_iova; -#else -# define should_fail(attr, size) 0 -#endif struct msm_kms; struct msm_gpu; From ccbfd2df3018e2694f750abe5e93b647a4eef5b1 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Tue, 13 Aug 2024 15:12:37 +0300 Subject: [PATCH 046/103] drm/xe: clean up fault injection usage MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With the proper stubs in place in linux/fault-inject.h, we can remove a bunch of conditional compilation for CONFIG_FAULT_INJECTION=n. Link: https://lkml.kernel.org/r/20240813121237.2382534-3-jani.nikula@intel.com Signed-off-by: Jani Nikula Reviewed-by: Thomas Hellström Reviewed-by: Himal Prasad Ghimiray Cc: Akinobu Mita Cc: Lucas De Marchi Cc: Rodrigo Vivi Cc: Abhinav Kumar Cc: Dmitry Baryshkov Cc: Rob Clark Signed-off-by: Andrew Morton --- drivers/gpu/drm/xe/xe_debugfs.c | 7 +------ drivers/gpu/drm/xe/xe_gt.h | 10 ++-------- 2 files changed, 3 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_debugfs.c b/drivers/gpu/drm/xe/xe_debugfs.c index 1011e5d281fa..b381bfb634f7 100644 --- a/drivers/gpu/drm/xe/xe_debugfs.c +++ b/drivers/gpu/drm/xe/xe_debugfs.c @@ -6,6 +6,7 @@ #include "xe_debugfs.h" #include +#include #include #include @@ -26,10 +27,7 @@ #include "xe_vm.h" #endif -#ifdef CONFIG_FAULT_INJECTION -#include /* XXX: fault-inject.h is broken */ DECLARE_FAULT_ATTR(gt_reset_failure); -#endif static struct xe_device *node_to_xe(struct drm_info_node *node) { @@ -214,8 +212,5 @@ void xe_debugfs_register(struct xe_device *xe) for_each_gt(gt, xe, id) xe_gt_debugfs_register(gt); -#ifdef CONFIG_FAULT_INJECTION fault_create_debugfs_attr("fail_gt_reset", root, >_reset_failure); -#endif - } diff --git a/drivers/gpu/drm/xe/xe_gt.h b/drivers/gpu/drm/xe/xe_gt.h index 8b1a5027dcf2..ee138e9768a2 100644 --- a/drivers/gpu/drm/xe/xe_gt.h +++ b/drivers/gpu/drm/xe/xe_gt.h @@ -6,6 +6,8 @@ #ifndef _XE_GT_H_ #define _XE_GT_H_ +#include + #include #include "xe_device.h" @@ -19,19 +21,11 @@ #define CCS_MASK(gt) (((gt)->info.engine_mask & XE_HW_ENGINE_CCS_MASK) >> XE_HW_ENGINE_CCS0) -#ifdef CONFIG_FAULT_INJECTION -#include /* XXX: fault-inject.h is broken */ extern struct fault_attr gt_reset_failure; static inline bool xe_fault_inject_gt_reset(void) { return should_fail(>_reset_failure, 1); } -#else -static inline bool xe_fault_inject_gt_reset(void) -{ - return false; -} -#endif struct xe_gt *xe_gt_alloc(struct xe_tile *tile); int xe_gt_init_hwconfig(struct xe_gt *gt); From cbf164cd44e06c78938b4a4a4479d3541779c319 Mon Sep 17 00:00:00 2001 From: Kuan-Wei Chiu Date: Tue, 13 Aug 2024 01:02:29 +0800 Subject: [PATCH 047/103] lib/bcd: optimize _bin2bcd() for improved performance The original _bin2bcd() function used / 10 and % 10 operations for conversion. Although GCC optimizes these operations and does not generate division or modulus instructions, the new implementation reduces the number of mov instructions in the generated code for both x86-64 and ARM architectures. This optimization calculates the tens digit using (val * 103) >> 10, which is accurate for values of 'val' in the range [0, 178]. Given that the valid input range is [0, 99], this method ensures correctness while simplifying the generated code. Link: https://lkml.kernel.org/r/20240812170229.229380-1-visitorckw@gmail.com Signed-off-by: Kuan-Wei Chiu Cc: Ching-Chun (Jim) Huang Signed-off-by: Andrew Morton --- lib/bcd.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/lib/bcd.c b/lib/bcd.c index 7e4750b6e801..c5e79ba9cd7b 100644 --- a/lib/bcd.c +++ b/lib/bcd.c @@ -10,6 +10,8 @@ EXPORT_SYMBOL(_bcd2bin); unsigned char _bin2bcd(unsigned val) { - return ((val / 10) << 4) + val % 10; + const unsigned int t = (val * 103) >> 10; + + return (t << 4) | (val - t * 10); } EXPORT_SYMBOL(_bin2bcd); From 16d9691ad4b562ea19271f0788738f649c02cf3c Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Wed, 14 Aug 2024 08:44:13 +0200 Subject: [PATCH 048/103] lib/percpu_counter: add missing __percpu qualifier to a cast Add missing __percpu qualifier to a (void *) cast to fix percpu_counter.c:212:36: warning: cast removes address space '__percpu' of expression percpu_counter.c:212:33: warning: incorrect type in assignment (different address spaces) percpu_counter.c:212:33: expected signed int [noderef] [usertype] __percpu *counters percpu_counter.c:212:33: got void * sparse warnings. Found by GCC's named address space checks. There were no changes in the resulting object file. Link: https://lkml.kernel.org/r/20240814064437.940162-1-ubizjak@gmail.com Signed-off-by: Uros Bizjak Cc: Dennis Zhou Cc: Tejun Heo Cc: Christoph Lameter Signed-off-by: Andrew Morton --- lib/percpu_counter.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/percpu_counter.c b/lib/percpu_counter.c index 51bc5246986d..2891f94a11c6 100644 --- a/lib/percpu_counter.c +++ b/lib/percpu_counter.c @@ -209,7 +209,7 @@ int __percpu_counter_init_many(struct percpu_counter *fbc, s64 amount, INIT_LIST_HEAD(&fbc[i].list); #endif fbc[i].count = amount; - fbc[i].counters = (void *)counters + (i * counter_size); + fbc[i].counters = (void __percpu *)counters + i * counter_size; debug_percpu_counter_activate(&fbc[i]); } From ef851d44a83ed625ec22eae6bd36a1348c8af571 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Thu, 15 Aug 2024 16:44:05 +0900 Subject: [PATCH 049/103] nilfs2: add support for FS_IOC_GETUUID Patch series "nilfs2: add support for some common ioctls". This series adds support for common ioctls to nilfs2 for getting the volume UUID and the relative path of an FS instance within the sysfs namespace, and also implements ioctls for nilfs2 to get and set the volume label. This patch (of 2): Expose the UUID of a file system instance using the super_set_uuid helper and support the FS_IOC_GETUUID ioctl. Link: https://lkml.kernel.org/r/20240815074408.5550-1-konishi.ryusuke@gmail.com Link: https://lkml.kernel.org/r/20240815074408.5550-2-konishi.ryusuke@gmail.com Signed-off-by: Ryusuke Konishi Signed-off-by: Andrew Morton --- fs/nilfs2/super.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c index e835e1f5a712..167050b3ce7e 100644 --- a/fs/nilfs2/super.c +++ b/fs/nilfs2/super.c @@ -1063,6 +1063,9 @@ nilfs_fill_super(struct super_block *sb, struct fs_context *fc) if (err) goto failed_nilfs; + super_set_uuid(sb, nilfs->ns_sbp[0]->s_uuid, + sizeof(nilfs->ns_sbp[0]->s_uuid)); + cno = nilfs_last_cno(nilfs); err = nilfs_attach_checkpoint(sb, cno, true, &fsroot); if (err) { From 8d1dba2e7cc74381087ae8ef03673abee758fcd0 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Thu, 15 Aug 2024 16:44:06 +0900 Subject: [PATCH 050/103] nilfs2: add support for FS_IOC_GETFSSYSFSPATH Use the standard helper super_set_sysfs_name_bdev() to give the sysfs subpath of the filesystem for the FS_IOC_GETFSSYSFSPATH ioctl. For nilfs2, it will output "nilfs2/". Link: https://lkml.kernel.org/r/20240815074408.5550-3-konishi.ryusuke@gmail.com Signed-off-by: Ryusuke Konishi Signed-off-by: Andrew Morton --- fs/nilfs2/super.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c index 167050b3ce7e..76e35e6773d1 100644 --- a/fs/nilfs2/super.c +++ b/fs/nilfs2/super.c @@ -1065,6 +1065,7 @@ nilfs_fill_super(struct super_block *sb, struct fs_context *fc) super_set_uuid(sb, nilfs->ns_sbp[0]->s_uuid, sizeof(nilfs->ns_sbp[0]->s_uuid)); + super_set_sysfs_name_bdev(sb); cno = nilfs_last_cno(nilfs); err = nilfs_attach_checkpoint(sb, cno, true, &fsroot); From 4b901256a7bf6db3ca84ee2b2e87a1af4d40b8a3 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Thu, 15 Aug 2024 16:44:07 +0900 Subject: [PATCH 051/103] nilfs2: add support for FS_IOC_GETFSLABEL Implement support for FS_IOC_GETFSLABEL ioctl to read filesystem label. Link: https://lkml.kernel.org/r/20240815074408.5550-4-konishi.ryusuke@gmail.com Signed-off-by: Ryusuke Konishi Signed-off-by: Andrew Morton --- fs/nilfs2/ioctl.c | 27 +++++++++++++++++++++++++++ fs/nilfs2/nilfs.h | 12 ++++++++++++ 2 files changed, 39 insertions(+) diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c index 8be471ce4f19..b5c6a50d6d5d 100644 --- a/fs/nilfs2/ioctl.c +++ b/fs/nilfs2/ioctl.c @@ -17,6 +17,7 @@ #include /* mnt_want_write_file(), mnt_drop_write_file() */ #include #include +#include #include "nilfs.h" #include "segment.h" #include "bmap.h" @@ -1266,6 +1267,29 @@ out: return ret; } +/** + * nilfs_ioctl_get_fslabel - get the volume name of the file system + * @sb: super block instance + * @argp: pointer to userspace memory where the volume name should be stored + * + * Return: 0 on success, %-EFAULT if copying to userspace memory fails. + */ +static int nilfs_ioctl_get_fslabel(struct super_block *sb, void __user *argp) +{ + struct the_nilfs *nilfs = sb->s_fs_info; + char label[NILFS_MAX_VOLUME_NAME + 1]; + + BUILD_BUG_ON(NILFS_MAX_VOLUME_NAME >= FSLABEL_MAX); + + down_read(&nilfs->ns_sem); + memtostr_pad(label, nilfs->ns_sbp[0]->s_volume_name); + up_read(&nilfs->ns_sem); + + if (copy_to_user(argp, label, sizeof(label))) + return -EFAULT; + return 0; +} + long nilfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { struct inode *inode = file_inode(filp); @@ -1308,6 +1332,8 @@ long nilfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) return nilfs_ioctl_set_alloc_range(inode, argp); case FITRIM: return nilfs_ioctl_trim_fs(inode, argp); + case FS_IOC_GETFSLABEL: + return nilfs_ioctl_get_fslabel(inode->i_sb, argp); default: return -ENOTTY; } @@ -1334,6 +1360,7 @@ long nilfs_compat_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) case NILFS_IOCTL_RESIZE: case NILFS_IOCTL_SET_ALLOC_RANGE: case FITRIM: + case FS_IOC_GETFSLABEL: break; default: return -ENOIOCTLCMD; diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h index 4017f7856440..3097490b6621 100644 --- a/fs/nilfs2/nilfs.h +++ b/fs/nilfs2/nilfs.h @@ -103,6 +103,18 @@ enum { NILFS_SB_COMMIT_ALL /* Commit both super blocks */ }; +/** + * define NILFS_MAX_VOLUME_NAME - maximum number of characters (bytes) in a + * file system volume name + * + * Defined by the size of the volume name field in the on-disk superblocks. + * This volume name does not include the terminating NULL byte if the string + * length matches the field size, so use (NILFS_MAX_VOLUME_NAME + 1) for the + * size of the buffer that requires a NULL byte termination. + */ +#define NILFS_MAX_VOLUME_NAME \ + sizeof_field(struct nilfs_super_block, s_volume_name) + /* * Macros to check inode numbers */ From 79785f7801275bc070035e3982f8ff4b336a1ceb Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Thu, 15 Aug 2024 16:44:08 +0900 Subject: [PATCH 052/103] nilfs2: add support for FS_IOC_SETFSLABEL Implement support for FS_IOC_SETFSLABEL ioctl to write filesystem label. Link: https://lkml.kernel.org/r/20240815074408.5550-5-konishi.ryusuke@gmail.com Signed-off-by: Ryusuke Konishi Signed-off-by: Andrew Morton --- fs/nilfs2/ioctl.c | 65 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 65 insertions(+) diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c index b5c6a50d6d5d..297989e51ee6 100644 --- a/fs/nilfs2/ioctl.c +++ b/fs/nilfs2/ioctl.c @@ -1290,6 +1290,68 @@ static int nilfs_ioctl_get_fslabel(struct super_block *sb, void __user *argp) return 0; } +/** + * nilfs_ioctl_set_fslabel - set the volume name of the file system + * @sb: super block instance + * @filp: file object + * @argp: pointer to userspace memory that contains the volume name + * + * Return: 0 on success, or the following negative error code on failure. + * * %-EFAULT - Error copying input data. + * * %-EINVAL - Label length exceeds record size in superblock. + * * %-EIO - I/O error. + * * %-EPERM - Operation not permitted (insufficient permissions). + * * %-EROFS - Read only file system. + */ +static int nilfs_ioctl_set_fslabel(struct super_block *sb, struct file *filp, + void __user *argp) +{ + char label[NILFS_MAX_VOLUME_NAME + 1]; + struct the_nilfs *nilfs = sb->s_fs_info; + struct nilfs_super_block **sbp; + size_t len; + int ret; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + ret = mnt_want_write_file(filp); + if (ret) + return ret; + + if (copy_from_user(label, argp, NILFS_MAX_VOLUME_NAME + 1)) { + ret = -EFAULT; + goto out_drop_write; + } + + len = strnlen(label, NILFS_MAX_VOLUME_NAME + 1); + if (len > NILFS_MAX_VOLUME_NAME) { + nilfs_err(sb, "unable to set label with more than %zu bytes", + NILFS_MAX_VOLUME_NAME); + ret = -EINVAL; + goto out_drop_write; + } + + down_write(&nilfs->ns_sem); + sbp = nilfs_prepare_super(sb, false); + if (unlikely(!sbp)) { + ret = -EIO; + goto out_unlock; + } + + strtomem_pad(sbp[0]->s_volume_name, label, 0); + if (sbp[1]) + strtomem_pad(sbp[1]->s_volume_name, label, 0); + + ret = nilfs_commit_super(sb, NILFS_SB_COMMIT_ALL); + +out_unlock: + up_write(&nilfs->ns_sem); +out_drop_write: + mnt_drop_write_file(filp); + return ret; +} + long nilfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { struct inode *inode = file_inode(filp); @@ -1334,6 +1396,8 @@ long nilfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) return nilfs_ioctl_trim_fs(inode, argp); case FS_IOC_GETFSLABEL: return nilfs_ioctl_get_fslabel(inode->i_sb, argp); + case FS_IOC_SETFSLABEL: + return nilfs_ioctl_set_fslabel(inode->i_sb, filp, argp); default: return -ENOTTY; } @@ -1361,6 +1425,7 @@ long nilfs_compat_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) case NILFS_IOCTL_SET_ALLOC_RANGE: case FITRIM: case FS_IOC_GETFSLABEL: + case FS_IOC_SETFSLABEL: break; default: return -ENOIOCTLCMD; From 299910dcb4525ac0274f3efa9527876315ba4f67 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Fri, 16 Aug 2024 18:01:28 +0900 Subject: [PATCH 053/103] nilfs2: do not output warnings when clearing dirty buffers After detecting file system corruption and degrading to a read-only mount, dirty folios and buffers in the page cache are cleared, and a large number of warnings are output at that time, often filling up the kernel log. In this case, since the degrading to a read-only mount is output to the kernel log, these warnings are not very meaningful, and are rather a nuisance in system management and debugging. The related nilfs2-specific page/folio routines have a silent argument that suppresses the warning output, but since it is not currently used meaningfully, remove both the silent argument and the warning output. Link: https://lkml.kernel.org/r/20240816090128.4561-1-konishi.ryusuke@gmail.com Signed-off-by: Ryusuke Konishi Signed-off-by: Andrew Morton --- fs/nilfs2/inode.c | 4 ++-- fs/nilfs2/mdt.c | 6 +++--- fs/nilfs2/page.c | 19 +++---------------- fs/nilfs2/page.h | 4 ++-- 4 files changed, 10 insertions(+), 23 deletions(-) diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c index 7340a01d80e1..c39bc940e6f2 100644 --- a/fs/nilfs2/inode.c +++ b/fs/nilfs2/inode.c @@ -162,7 +162,7 @@ static int nilfs_writepages(struct address_space *mapping, int err = 0; if (sb_rdonly(inode->i_sb)) { - nilfs_clear_dirty_pages(mapping, false); + nilfs_clear_dirty_pages(mapping); return -EROFS; } @@ -186,7 +186,7 @@ static int nilfs_writepage(struct page *page, struct writeback_control *wbc) * have dirty pages that try to be flushed in background. * So, here we simply discard this dirty page. */ - nilfs_clear_folio_dirty(folio, false); + nilfs_clear_folio_dirty(folio); folio_unlock(folio); return -EROFS; } diff --git a/fs/nilfs2/mdt.c b/fs/nilfs2/mdt.c index 4f792a0ad0f0..ceb7dc0b5bad 100644 --- a/fs/nilfs2/mdt.c +++ b/fs/nilfs2/mdt.c @@ -411,7 +411,7 @@ nilfs_mdt_write_page(struct page *page, struct writeback_control *wbc) * have dirty folios that try to be flushed in background. * So, here we simply discard this dirty folio. */ - nilfs_clear_folio_dirty(folio, false); + nilfs_clear_folio_dirty(folio); folio_unlock(folio); return -EROFS; } @@ -638,10 +638,10 @@ void nilfs_mdt_restore_from_shadow_map(struct inode *inode) if (mi->mi_palloc_cache) nilfs_palloc_clear_cache(inode); - nilfs_clear_dirty_pages(inode->i_mapping, true); + nilfs_clear_dirty_pages(inode->i_mapping); nilfs_copy_back_pages(inode->i_mapping, shadow->inode->i_mapping); - nilfs_clear_dirty_pages(ii->i_assoc_inode->i_mapping, true); + nilfs_clear_dirty_pages(ii->i_assoc_inode->i_mapping); nilfs_copy_back_pages(ii->i_assoc_inode->i_mapping, NILFS_I(shadow->inode)->i_assoc_inode->i_mapping); diff --git a/fs/nilfs2/page.c b/fs/nilfs2/page.c index 14e470fb8870..7797903e014e 100644 --- a/fs/nilfs2/page.c +++ b/fs/nilfs2/page.c @@ -357,9 +357,8 @@ repeat: /** * nilfs_clear_dirty_pages - discard dirty pages in address space * @mapping: address space with dirty pages for discarding - * @silent: suppress [true] or print [false] warning messages */ -void nilfs_clear_dirty_pages(struct address_space *mapping, bool silent) +void nilfs_clear_dirty_pages(struct address_space *mapping) { struct folio_batch fbatch; unsigned int i; @@ -380,7 +379,7 @@ void nilfs_clear_dirty_pages(struct address_space *mapping, bool silent) * was acquired. Skip processing in that case. */ if (likely(folio->mapping == mapping)) - nilfs_clear_folio_dirty(folio, silent); + nilfs_clear_folio_dirty(folio); folio_unlock(folio); } @@ -392,20 +391,13 @@ void nilfs_clear_dirty_pages(struct address_space *mapping, bool silent) /** * nilfs_clear_folio_dirty - discard dirty folio * @folio: dirty folio that will be discarded - * @silent: suppress [true] or print [false] warning messages */ -void nilfs_clear_folio_dirty(struct folio *folio, bool silent) +void nilfs_clear_folio_dirty(struct folio *folio) { - struct inode *inode = folio->mapping->host; - struct super_block *sb = inode->i_sb; struct buffer_head *bh, *head; BUG_ON(!folio_test_locked(folio)); - if (!silent) - nilfs_warn(sb, "discard dirty page: offset=%lld, ino=%lu", - folio_pos(folio), inode->i_ino); - folio_clear_uptodate(folio); folio_clear_mappedtodisk(folio); @@ -419,11 +411,6 @@ void nilfs_clear_folio_dirty(struct folio *folio, bool silent) bh = head; do { lock_buffer(bh); - if (!silent) - nilfs_warn(sb, - "discard dirty block: blocknr=%llu, size=%zu", - (u64)bh->b_blocknr, bh->b_size); - set_mask_bits(&bh->b_state, clear_bits, 0); unlock_buffer(bh); } while (bh = bh->b_this_page, bh != head); diff --git a/fs/nilfs2/page.h b/fs/nilfs2/page.h index 7e1a2c455a10..64521a03a19e 100644 --- a/fs/nilfs2/page.h +++ b/fs/nilfs2/page.h @@ -41,8 +41,8 @@ void nilfs_folio_bug(struct folio *); int nilfs_copy_dirty_pages(struct address_space *, struct address_space *); void nilfs_copy_back_pages(struct address_space *, struct address_space *); -void nilfs_clear_folio_dirty(struct folio *, bool); -void nilfs_clear_dirty_pages(struct address_space *, bool); +void nilfs_clear_folio_dirty(struct folio *folio); +void nilfs_clear_dirty_pages(struct address_space *mapping); unsigned int nilfs_page_count_clean_buffers(struct page *, unsigned int, unsigned int); unsigned long nilfs_find_uncommitted_extent(struct inode *inode, From b79bdfdd31a4ffe441ad347f4d869fde997bb69c Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Fri, 16 Aug 2024 16:43:12 +0900 Subject: [PATCH 054/103] nilfs2: add missing argument description for __nilfs_error() Patch series "This series fixes a number of formatting issues in kernel doc comments" This series fixes a number of formatting issues in kernel doc comments that were detected as warnings by the kernel-doc script, making violations more noticeable when adding or modifying kernel doc. There are still warnings output by "kernel-doc -Wall", but they are widespread, so I plan to fix them at another time while considering priorities. This patch (of 8): Add missing argument description to __nilfs_error function and remove the following warnings from kernel-doc script output: fs/nilfs2/super.c:121: warning: Function parameter or struct member 'sb' not described in '__nilfs_error' fs/nilfs2/super.c:121: warning: Function parameter or struct member 'function' not described in '__nilfs_error' fs/nilfs2/super.c:121: warning: Function parameter or struct member 'fmt' not described in '__nilfs_error' Link: https://lkml.kernel.org/r/20240816074319.3253-1-konishi.ryusuke@gmail.com Link: https://lkml.kernel.org/r/20240816074319.3253-2-konishi.ryusuke@gmail.com Signed-off-by: Ryusuke Konishi Signed-off-by: Andrew Morton --- fs/nilfs2/super.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c index 76e35e6773d1..8eb8dbc9f51c 100644 --- a/fs/nilfs2/super.c +++ b/fs/nilfs2/super.c @@ -105,6 +105,10 @@ static void nilfs_set_error(struct super_block *sb) /** * __nilfs_error() - report failure condition on a filesystem + * @sb: super block instance + * @function: name of calling function + * @fmt: format string for message to be output + * @...: optional arguments to @fmt * * __nilfs_error() sets an ERROR_FS flag on the superblock as well as * reporting an error message. This function should be called when From 3e62c5d7d0a4e8fa826d6e2f8e19c805045edb82 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Fri, 16 Aug 2024 16:43:13 +0900 Subject: [PATCH 055/103] nilfs2: add missing argument descriptions for ioctl-related helpers Add missing argument descriptions and return value information to the kernel-doc comments for ioctl helper functions, and eliminate the following warnings output by the kernel-doc script: fs/nilfs2/ioctl.c:120: warning: Function parameter or struct member 'dentry' not described in 'nilfs_fileattr_get' fs/nilfs2/ioctl.c:120: warning: Function parameter or struct member 'fa' not described in 'nilfs_fileattr_get' fs/nilfs2/ioctl.c:133: warning: Function parameter or struct member 'idmap' not described in 'nilfs_fileattr_set' fs/nilfs2/ioctl.c:133: warning: Function parameter or struct member 'dentry' not described in 'nilfs_fileattr_set' fs/nilfs2/ioctl.c:133: warning: Function parameter or struct member 'fa' not described in 'nilfs_fileattr_set' fs/nilfs2/ioctl.c:164: warning: Function parameter or struct member 'inode' not described in 'nilfs_ioctl_getversion' fs/nilfs2/ioctl.c:164: warning: Function parameter or struct member 'argp' not described in 'nilfs_ioctl_getversion' Link: https://lkml.kernel.org/r/20240816074319.3253-3-konishi.ryusuke@gmail.com Signed-off-by: Ryusuke Konishi Signed-off-by: Andrew Morton --- fs/nilfs2/ioctl.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c index 297989e51ee6..fa77f78df681 100644 --- a/fs/nilfs2/ioctl.c +++ b/fs/nilfs2/ioctl.c @@ -115,7 +115,11 @@ static int nilfs_ioctl_wrap_copy(struct the_nilfs *nilfs, } /** - * nilfs_fileattr_get - ioctl to support lsattr + * nilfs_fileattr_get - retrieve miscellaneous file attributes + * @dentry: the object to retrieve from + * @fa: fileattr pointer + * + * Return: always 0 as success. */ int nilfs_fileattr_get(struct dentry *dentry, struct fileattr *fa) { @@ -127,7 +131,12 @@ int nilfs_fileattr_get(struct dentry *dentry, struct fileattr *fa) } /** - * nilfs_fileattr_set - ioctl to support chattr + * nilfs_fileattr_set - change miscellaneous file attributes + * @idmap: idmap of the mount + * @dentry: the object to change + * @fa: fileattr pointer + * + * Return: 0 on success, or a negative error code on failure. */ int nilfs_fileattr_set(struct mnt_idmap *idmap, struct dentry *dentry, struct fileattr *fa) @@ -160,6 +169,10 @@ int nilfs_fileattr_set(struct mnt_idmap *idmap, /** * nilfs_ioctl_getversion - get info about a file's version (generation number) + * @inode: inode object + * @argp: userspace memory where the generation number of @inode is stored + * + * Return: 0 on success, or %-EFAULT on error. */ static int nilfs_ioctl_getversion(struct inode *inode, void __user *argp) { From 60d8b01e55b2c45e73d442b379844d6efd9d16a7 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Fri, 16 Aug 2024 16:43:14 +0900 Subject: [PATCH 056/103] nilfs2: improve kernel-doc comments for b-tree node helpers Revise kernel-doc comments for helper functions related to changing the search key for b-tree node blocks, and eliminate the following warnings output by the kernel-doc script: fs/nilfs2/btnode.c:175: warning: Function parameter or struct member 'btnc' not described in 'nilfs_btnode_prepare_change_key' fs/nilfs2/btnode.c:175: warning: Function parameter or struct member 'ctxt' not described in 'nilfs_btnode_prepare_change_key' fs/nilfs2/btnode.c:238: warning: Function parameter or struct member 'btnc' not described in 'nilfs_btnode_commit_change_key' fs/nilfs2/btnode.c:238: warning: Function parameter or struct member 'ctxt' not described in 'nilfs_btnode_commit_change_key' fs/nilfs2/btnode.c:278: warning: Function parameter or struct member 'btnc' not described in 'nilfs_btnode_abort_change_key' fs/nilfs2/btnode.c:278: warning: Function parameter or struct member 'ctxt' not described in 'nilfs_btnode_abort_change_key' Link: https://lkml.kernel.org/r/20240816074319.3253-4-konishi.ryusuke@gmail.com Signed-off-by: Ryusuke Konishi Signed-off-by: Andrew Morton --- fs/nilfs2/btnode.c | 63 +++++++++++++++++++++++++++++++++++++++------- 1 file changed, 54 insertions(+), 9 deletions(-) diff --git a/fs/nilfs2/btnode.c b/fs/nilfs2/btnode.c index c034080c334b..57b4af5ad646 100644 --- a/fs/nilfs2/btnode.c +++ b/fs/nilfs2/btnode.c @@ -179,11 +179,32 @@ void nilfs_btnode_delete(struct buffer_head *bh) } /** - * nilfs_btnode_prepare_change_key - * prepare to move contents of the block for old key to one of new key. - * the old buffer will not be removed, but might be reused for new buffer. - * it might return -ENOMEM because of memory allocation errors, - * and might return -EIO because of disk read errors. + * nilfs_btnode_prepare_change_key - prepare to change the search key of a + * b-tree node block + * @btnc: page cache in which the b-tree node block is buffered + * @ctxt: structure for exchanging context information for key change + * + * nilfs_btnode_prepare_change_key() prepares to move the contents of the + * b-tree node block of the old key given in the "oldkey" member of @ctxt to + * the position of the new key given in the "newkey" member of @ctxt in the + * page cache @btnc. Here, the key of the block is an index in units of + * blocks, and if the page and block sizes match, it matches the page index + * in the page cache. + * + * If the page size and block size match, this function attempts to move the + * entire folio, and in preparation for this, inserts the original folio into + * the new index of the cache. If this insertion fails or if the page size + * and block size are different, it falls back to a copy preparation using + * nilfs_btnode_create_block(), inserts a new block at the position + * corresponding to "newkey", and stores the buffer head pointer in the + * "newbh" member of @ctxt. + * + * Note that the current implementation does not support folio sizes larger + * than the page size. + * + * Return: 0 on success, or the following negative error code on failure. + * * %-EIO - I/O error (metadata corruption). + * * %-ENOMEM - Insufficient memory available. */ int nilfs_btnode_prepare_change_key(struct address_space *btnc, struct nilfs_btnode_chkey_ctxt *ctxt) @@ -245,8 +266,21 @@ retry: } /** - * nilfs_btnode_commit_change_key - * commit the change_key operation prepared by prepare_change_key(). + * nilfs_btnode_commit_change_key - commit the change of the search key of + * a b-tree node block + * @btnc: page cache in which the b-tree node block is buffered + * @ctxt: structure for exchanging context information for key change + * + * nilfs_btnode_commit_change_key() executes the key change based on the + * context @ctxt prepared by nilfs_btnode_prepare_change_key(). If no valid + * block buffer is prepared in "newbh" of @ctxt (i.e., a full folio move), + * this function removes the folio from the old index and completes the move. + * Otherwise, it copies the block data and inherited flag states of "oldbh" + * to "newbh" and clears the "oldbh" from the cache. In either case, the + * relocated buffer is marked as dirty. + * + * As with nilfs_btnode_prepare_change_key(), the current implementation does + * not support folio sizes larger than the page size. */ void nilfs_btnode_commit_change_key(struct address_space *btnc, struct nilfs_btnode_chkey_ctxt *ctxt) @@ -285,8 +319,19 @@ void nilfs_btnode_commit_change_key(struct address_space *btnc, } /** - * nilfs_btnode_abort_change_key - * abort the change_key operation prepared by prepare_change_key(). + * nilfs_btnode_abort_change_key - abort the change of the search key of a + * b-tree node block + * @btnc: page cache in which the b-tree node block is buffered + * @ctxt: structure for exchanging context information for key change + * + * nilfs_btnode_abort_change_key() cancels the key change associated with the + * context @ctxt prepared via nilfs_btnode_prepare_change_key() and performs + * any necessary cleanup. If no valid block buffer is prepared in "newbh" of + * @ctxt, this function removes the folio from the destination index and aborts + * the move. Otherwise, it clears "newbh" from the cache. + * + * As with nilfs_btnode_prepare_change_key(), the current implementation does + * not support folio sizes larger than the page size. */ void nilfs_btnode_abort_change_key(struct address_space *btnc, struct nilfs_btnode_chkey_ctxt *ctxt) From 89a6c1775089eae99940d4a86f2ba34cbe848726 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Fri, 16 Aug 2024 16:43:15 +0900 Subject: [PATCH 057/103] nilfs2: fix incorrect kernel-doc declaration of nilfs_palloc_req structure The "struct" keyword is missing from the kernel-doc comment of the nilfs_palloc_req structure, so add it to eliminate the following warning output by the kernel-doc script: fs/nilfs2/alloc.h:46: warning: cannot understand function prototype: 'struct nilfs_palloc_req ' Link: https://lkml.kernel.org/r/20240816074319.3253-5-konishi.ryusuke@gmail.com Signed-off-by: Ryusuke Konishi Signed-off-by: Andrew Morton --- fs/nilfs2/alloc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nilfs2/alloc.h b/fs/nilfs2/alloc.h index d825a9faca6d..e19d7eb10084 100644 --- a/fs/nilfs2/alloc.h +++ b/fs/nilfs2/alloc.h @@ -37,7 +37,7 @@ void *nilfs_palloc_block_get_entry(const struct inode *, __u64, int nilfs_palloc_count_max_entries(struct inode *, u64, u64 *); /** - * nilfs_palloc_req - persistent allocator request and reply + * struct nilfs_palloc_req - persistent allocator request and reply * @pr_entry_nr: entry number (vblocknr or inode number) * @pr_desc_bh: buffer head of the buffer containing block group descriptors * @pr_bitmap_bh: buffer head of the buffer containing a block group bitmap From 0e13ddee285ffa0815fa66e1eac4bf0fafd06ce4 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Fri, 16 Aug 2024 16:43:16 +0900 Subject: [PATCH 058/103] nilfs2: add missing description of nilfs_btree_path structure Add missing kernel-doc comment for the 'bp_ctxt' member variable of the nilfs_btree_path structure, and eliminate the following warning output by the kenrel-doc script: fs/nilfs2/btree.h:39: warning: Function parameter or struct member 'bp_ctxt' not described in 'nilfs_btree_path' Link: https://lkml.kernel.org/r/20240816074319.3253-6-konishi.ryusuke@gmail.com Signed-off-by: Ryusuke Konishi Signed-off-by: Andrew Morton --- fs/nilfs2/btree.h | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/nilfs2/btree.h b/fs/nilfs2/btree.h index 92868e1a48ca..2a220f716c91 100644 --- a/fs/nilfs2/btree.h +++ b/fs/nilfs2/btree.h @@ -24,6 +24,7 @@ * @bp_index: index of child node * @bp_oldreq: ptr end request for old ptr * @bp_newreq: ptr alloc request for new ptr + * @bp_ctxt: context information for changing the key of a b-tree node block * @bp_op: rebalance operation */ struct nilfs_btree_path { From d9e5551ea101203151077c42af0bebeb6825f636 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Fri, 16 Aug 2024 16:43:17 +0900 Subject: [PATCH 059/103] nilfs2: describe the members of nilfs_bmap_operations structure Add missing member variable descriptions in the kernel-doc comments for the nilfs_bmap_operations structure, hiding the internal operations with the "private:" tag. This eliminates the following warnings output by the kernel-doc script: fs/nilfs2/bmap.h:74: warning: Function parameter or struct member 'bop_lookup' not described in 'nilfs_bmap_operations' fs/nilfs2/bmap.h:74: warning: Function parameter or struct member 'bop_lookup_contig' not described in 'nilfs_bmap_operations' ... fs/nilfs2/bmap.h:74: warning: Function parameter or struct member 'bop_gather_data' not described in 'nilfs_bmap_operations' Link: https://lkml.kernel.org/r/20240816074319.3253-7-konishi.ryusuke@gmail.com Signed-off-by: Ryusuke Konishi Signed-off-by: Andrew Morton --- fs/nilfs2/bmap.h | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/fs/nilfs2/bmap.h b/fs/nilfs2/bmap.h index 608168a5cb88..5f8c8c552620 100644 --- a/fs/nilfs2/bmap.h +++ b/fs/nilfs2/bmap.h @@ -44,6 +44,19 @@ struct nilfs_bmap_stats { /** * struct nilfs_bmap_operations - bmap operation table + * @bop_lookup: single block search operation + * @bop_lookup_contig: consecutive block search operation + * @bop_insert: block insertion operation + * @bop_delete: block delete operation + * @bop_clear: block mapping resource release operation + * @bop_propagate: operation to propagate dirty state towards the + * mapping root + * @bop_lookup_dirty_buffers: operation to collect dirty block buffers + * @bop_assign: disk block address assignment operation + * @bop_mark: operation to mark in-use blocks as dirty for + * relocation by GC + * @bop_seek_key: find valid block key operation + * @bop_last_key: find last valid block key operation */ struct nilfs_bmap_operations { int (*bop_lookup)(const struct nilfs_bmap *, __u64, int, __u64 *); @@ -66,7 +79,7 @@ struct nilfs_bmap_operations { int (*bop_seek_key)(const struct nilfs_bmap *, __u64, __u64 *); int (*bop_last_key)(const struct nilfs_bmap *, __u64 *); - /* The following functions are internal use only. */ + /* private: internal use only */ int (*bop_check_insert)(const struct nilfs_bmap *, __u64); int (*bop_check_delete)(struct nilfs_bmap *, __u64); int (*bop_gather_data)(struct nilfs_bmap *, __u64 *, __u64 *, int); From 7876bc1bd6e89723edd1cb68f7d7bd83568ce82b Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Fri, 16 Aug 2024 16:43:18 +0900 Subject: [PATCH 060/103] nilfs2: fix inconsistencies in kernel-doc comments in segment.h Fix incorrect or missing variable names in the member variable descriptions in the nilfs_recovery_info and nilfs_sc_info structures, thereby eliminating the following warnings output by the kernel-doc script: fs/nilfs2/segment.h:49: warning: Function parameter or struct member 'ri_cno' not described in 'nilfs_recovery_info' fs/nilfs2/segment.h:49: warning: Function parameter or struct member 'ri_lsegs_start_seq' not described in 'nilfs_recovery_info' fs/nilfs2/segment.h:49: warning: Excess struct member 'ri_ri_cno' description in 'nilfs_recovery_info' fs/nilfs2/segment.h:49: warning: Excess struct member 'ri_lseg_start_seq' description in 'nilfs_recovery_info' fs/nilfs2/segment.h:177: warning: Function parameter or struct member 'sc_seq_accepted' not described in 'nilfs_sc_info' fs/nilfs2/segment.h:177: warning: Function parameter or struct member 'sc_timer_task' not described in 'nilfs_sc_info' fs/nilfs2/segment.h:177: warning: Excess struct member 'sc_seq_accept' description in 'nilfs_sc_info' Link: https://lkml.kernel.org/r/20240816074319.3253-8-konishi.ryusuke@gmail.com Signed-off-by: Ryusuke Konishi Signed-off-by: Andrew Morton --- fs/nilfs2/segment.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/fs/nilfs2/segment.h b/fs/nilfs2/segment.h index 1060f72ebf5a..2499721ebcc9 100644 --- a/fs/nilfs2/segment.h +++ b/fs/nilfs2/segment.h @@ -22,10 +22,10 @@ struct nilfs_root; * struct nilfs_recovery_info - Recovery information * @ri_need_recovery: Recovery status * @ri_super_root: Block number of the last super root - * @ri_ri_cno: Number of the last checkpoint + * @ri_cno: Number of the last checkpoint * @ri_lsegs_start: Region for roll-forwarding (start block number) * @ri_lsegs_end: Region for roll-forwarding (end block number) - * @ri_lseg_start_seq: Sequence value of the segment at ri_lsegs_start + * @ri_lsegs_start_seq: Sequence value of the segment at ri_lsegs_start * @ri_used_segments: List of segments to be mark active * @ri_pseg_start: Block number of the last partial segment * @ri_seq: Sequence number on the last partial segment @@ -107,7 +107,7 @@ struct nilfs_segsum_pointer { * @sc_wait_daemon: Daemon wait queue * @sc_wait_task: Start/end wait queue to control segctord task * @sc_seq_request: Request counter - * @sc_seq_accept: Accepted request count + * @sc_seq_accepted: Accepted request count * @sc_seq_done: Completion counter * @sc_sync: Request of explicit sync operation * @sc_interval: Timeout value of background construction @@ -115,6 +115,7 @@ struct nilfs_segsum_pointer { * @sc_lseg_stime: Start time of the latest logical segment * @sc_watermark: Watermark for the number of dirty buffers * @sc_timer: Timer for segctord + * @sc_timer_task: Thread woken up by @sc_timer * @sc_task: current thread of segctord */ struct nilfs_sc_info { From caaab56609ce48076af7361163b6a8f7f14d53b3 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Fri, 16 Aug 2024 16:43:19 +0900 Subject: [PATCH 061/103] nilfs2: fix missing initial short descriptions of kernel-doc comments Update some kernel-doc comments that are missing the initial short description and fix the following warnings output by the kernel-doc script: fs/nilfs2/bmap.c:353: warning: missing initial short description on line: * nilfs_bmap_lookup_dirty_buffers - fs/nilfs2/cpfile.c:708: warning: missing initial short description on line: * nilfs_cpfile_delete_checkpoint - fs/nilfs2/cpfile.c:972: warning: missing initial short description on line: * nilfs_cpfile_is_snapshot - fs/nilfs2/dat.c:275: warning: missing initial short description on line: * nilfs_dat_mark_dirty - fs/nilfs2/sufile.c:844: warning: missing initial short description on line: * nilfs_sufile_get_suinfo - Link: https://lkml.kernel.org/r/20240816074319.3253-9-konishi.ryusuke@gmail.com Signed-off-by: Ryusuke Konishi Signed-off-by: Andrew Morton --- fs/nilfs2/bmap.c | 2 +- fs/nilfs2/cpfile.c | 32 ++++++++++++++++---------------- fs/nilfs2/dat.c | 17 +++++++---------- fs/nilfs2/sufile.c | 20 ++++++++------------ 4 files changed, 32 insertions(+), 39 deletions(-) diff --git a/fs/nilfs2/bmap.c b/fs/nilfs2/bmap.c index cd14ea25968c..c9e8d9a7d820 100644 --- a/fs/nilfs2/bmap.c +++ b/fs/nilfs2/bmap.c @@ -349,7 +349,7 @@ int nilfs_bmap_propagate(struct nilfs_bmap *bmap, struct buffer_head *bh) } /** - * nilfs_bmap_lookup_dirty_buffers - + * nilfs_bmap_lookup_dirty_buffers - collect dirty block buffers * @bmap: bmap * @listp: pointer to buffer head list */ diff --git a/fs/nilfs2/cpfile.c b/fs/nilfs2/cpfile.c index 69a5cced1e84..9c8d531cffa7 100644 --- a/fs/nilfs2/cpfile.c +++ b/fs/nilfs2/cpfile.c @@ -704,9 +704,15 @@ ssize_t nilfs_cpfile_get_cpinfo(struct inode *cpfile, __u64 *cnop, int mode, } /** - * nilfs_cpfile_delete_checkpoint - - * @cpfile: - * @cno: + * nilfs_cpfile_delete_checkpoint - delete a checkpoint + * @cpfile: checkpoint file inode + * @cno: checkpoint number to delete + * + * Return: 0 on success, or the following negative error code on failure. + * * %-EBUSY - Checkpoint in use (snapshot specified). + * * %-EIO - I/O error (including metadata corruption). + * * %-ENOENT - No valid checkpoint found. + * * %-ENOMEM - Insufficient memory available. */ int nilfs_cpfile_delete_checkpoint(struct inode *cpfile, __u64 cno) { @@ -968,21 +974,15 @@ static int nilfs_cpfile_clear_snapshot(struct inode *cpfile, __u64 cno) } /** - * nilfs_cpfile_is_snapshot - + * nilfs_cpfile_is_snapshot - determine if checkpoint is a snapshot * @cpfile: inode of checkpoint file - * @cno: checkpoint number + * @cno: checkpoint number * - * Description: - * - * Return Value: On success, 1 is returned if the checkpoint specified by - * @cno is a snapshot, or 0 if not. On error, one of the following negative - * error codes is returned. - * - * %-EIO - I/O error. - * - * %-ENOMEM - Insufficient amount of memory available. - * - * %-ENOENT - No such checkpoint. + * Return: 1 if the checkpoint specified by @cno is a snapshot, 0 if not, or + * the following negative error code on failure. + * * %-EIO - I/O error (including metadata corruption). + * * %-ENOENT - No such checkpoint. + * * %-ENOMEM - Insufficient memory available. */ int nilfs_cpfile_is_snapshot(struct inode *cpfile, __u64 cno) { diff --git a/fs/nilfs2/dat.c b/fs/nilfs2/dat.c index fc1caf63a42a..0bef662176a4 100644 --- a/fs/nilfs2/dat.c +++ b/fs/nilfs2/dat.c @@ -271,18 +271,15 @@ void nilfs_dat_abort_update(struct inode *dat, } /** - * nilfs_dat_mark_dirty - - * @dat: DAT file inode + * nilfs_dat_mark_dirty - mark the DAT block buffer containing the specified + * virtual block address entry as dirty + * @dat: DAT file inode * @vblocknr: virtual block number * - * Description: - * - * Return Value: On success, 0 is returned. On error, one of the following - * negative error codes is returned. - * - * %-EIO - I/O error. - * - * %-ENOMEM - Insufficient amount of memory available. + * Return: 0 on success, or the following negative error code on failure. + * * %-EINVAL - Invalid DAT entry (internal code). + * * %-EIO - I/O error (including metadata corruption). + * * %-ENOMEM - Insufficient memory available. */ int nilfs_dat_mark_dirty(struct inode *dat, __u64 vblocknr) { diff --git a/fs/nilfs2/sufile.c b/fs/nilfs2/sufile.c index 6748218be7c5..7bfc0860acee 100644 --- a/fs/nilfs2/sufile.c +++ b/fs/nilfs2/sufile.c @@ -840,21 +840,17 @@ out: } /** - * nilfs_sufile_get_suinfo - + * nilfs_sufile_get_suinfo - get segment usage information * @sufile: inode of segment usage file * @segnum: segment number to start looking - * @buf: array of suinfo - * @sisz: byte size of suinfo - * @nsi: size of suinfo array + * @buf: array of suinfo + * @sisz: byte size of suinfo + * @nsi: size of suinfo array * - * Description: - * - * Return Value: On success, 0 is returned and .... On error, one of the - * following negative error codes is returned. - * - * %-EIO - I/O error. - * - * %-ENOMEM - Insufficient amount of memory available. + * Return: Count of segment usage info items stored in the output buffer on + * success, or the following negative error code on failure. + * * %-EIO - I/O error (including metadata corruption). + * * %-ENOMEM - Insufficient memory available. */ ssize_t nilfs_sufile_get_suinfo(struct inode *sufile, __u64 segnum, void *buf, unsigned int sisz, size_t nsi) From c91c6062d6cd1bc366efb04973ee449c30398a49 Mon Sep 17 00:00:00 2001 From: Sourabh Jain Date: Mon, 12 Aug 2024 09:46:51 +0530 Subject: [PATCH 062/103] Document/kexec: generalize crash hotplug description Commit 79365026f869 ("crash: add a new kexec flag for hotplug support") generalizes the crash hotplug support to allow architectures to update multiple kexec segments on CPU/Memory hotplug and not just elfcorehdr. Therefore, update the relevant kernel documentation to reflect the same. No functional change. Link: https://lkml.kernel.org/r/20240812041651.703156-1-sourabhjain@linux.ibm.com Signed-off-by: Sourabh Jain Reviewed-by: Petr Tesarik Acked-by: Baoquan He Cc: Hari Bathini Cc: Petr Tesarik Cc: Sourabh Jain Cc: Jonathan Corbet Signed-off-by: Andrew Morton --- .../ABI/testing/sysfs-devices-memory | 6 ++-- .../ABI/testing/sysfs-devices-system-cpu | 6 ++-- .../admin-guide/mm/memory-hotplug.rst | 5 +-- Documentation/core-api/cpu_hotplug.rst | 10 +++--- kernel/crash_core.c | 33 +++++++++++-------- 5 files changed, 35 insertions(+), 25 deletions(-) diff --git a/Documentation/ABI/testing/sysfs-devices-memory b/Documentation/ABI/testing/sysfs-devices-memory index a95e0f17c35a..cec65827e602 100644 --- a/Documentation/ABI/testing/sysfs-devices-memory +++ b/Documentation/ABI/testing/sysfs-devices-memory @@ -115,6 +115,6 @@ What: /sys/devices/system/memory/crash_hotplug Date: Aug 2023 Contact: Linux kernel mailing list Description: - (RO) indicates whether or not the kernel directly supports - modifying the crash elfcorehdr for memory hot un/plug and/or - on/offline changes. + (RO) indicates whether or not the kernel updates relevant kexec + segments on memory hot un/plug and/or on/offline events, avoiding the + need to reload kdump kernel. diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu index de725ca3be82..206079d3bd5b 100644 --- a/Documentation/ABI/testing/sysfs-devices-system-cpu +++ b/Documentation/ABI/testing/sysfs-devices-system-cpu @@ -704,9 +704,9 @@ What: /sys/devices/system/cpu/crash_hotplug Date: Aug 2023 Contact: Linux kernel mailing list Description: - (RO) indicates whether or not the kernel directly supports - modifying the crash elfcorehdr for CPU hot un/plug and/or - on/offline changes. + (RO) indicates whether or not the kernel updates relevant kexec + segments on memory hot un/plug and/or on/offline events, avoiding the + need to reload kdump kernel. What: /sys/devices/system/cpu/enabled Date: Nov 2022 diff --git a/Documentation/admin-guide/mm/memory-hotplug.rst b/Documentation/admin-guide/mm/memory-hotplug.rst index 098f14d83e99..cb2c080f400c 100644 --- a/Documentation/admin-guide/mm/memory-hotplug.rst +++ b/Documentation/admin-guide/mm/memory-hotplug.rst @@ -294,8 +294,9 @@ The following files are currently defined: ``crash_hotplug`` read-only: when changes to the system memory map occur due to hot un/plug of memory, this file contains '1' if the kernel updates the kdump capture kernel memory - map itself (via elfcorehdr), or '0' if userspace must update - the kdump capture kernel memory map. + map itself (via elfcorehdr and other relevant kexec + segments), or '0' if userspace must update the kdump + capture kernel memory map. Availability depends on the CONFIG_MEMORY_HOTPLUG kernel configuration option. diff --git a/Documentation/core-api/cpu_hotplug.rst b/Documentation/core-api/cpu_hotplug.rst index dcb0e379e5e8..a21dbf261be7 100644 --- a/Documentation/core-api/cpu_hotplug.rst +++ b/Documentation/core-api/cpu_hotplug.rst @@ -737,8 +737,9 @@ can process the event further. When changes to the CPUs in the system occur, the sysfs file /sys/devices/system/cpu/crash_hotplug contains '1' if the kernel -updates the kdump capture kernel list of CPUs itself (via elfcorehdr), -or '0' if userspace must update the kdump capture kernel list of CPUs. +updates the kdump capture kernel list of CPUs itself (via elfcorehdr and +other relevant kexec segment), or '0' if userspace must update the kdump +capture kernel list of CPUs. The availability depends on the CONFIG_HOTPLUG_CPU kernel configuration option. @@ -750,8 +751,9 @@ file can be used in a udev rule as follows: SUBSYSTEM=="cpu", ATTRS{crash_hotplug}=="1", GOTO="kdump_reload_end" For a CPU hot un/plug event, if the architecture supports kernel updates -of the elfcorehdr (which contains the list of CPUs), then the rule skips -the unload-then-reload of the kdump capture kernel. +of the elfcorehdr (which contains the list of CPUs) and other relevant +kexec segments, then the rule skips the unload-then-reload of the kdump +capture kernel. Kernel Inline Documentations Reference ====================================== diff --git a/kernel/crash_core.c b/kernel/crash_core.c index 63cf89393c6e..c1048893f4b6 100644 --- a/kernel/crash_core.c +++ b/kernel/crash_core.c @@ -505,7 +505,7 @@ int crash_check_hotplug_support(void) crash_hotplug_lock(); /* Obtain lock while reading crash information */ if (!kexec_trylock()) { - pr_info("kexec_trylock() failed, elfcorehdr may be inaccurate\n"); + pr_info("kexec_trylock() failed, kdump image may be inaccurate\n"); crash_hotplug_unlock(); return 0; } @@ -520,18 +520,25 @@ int crash_check_hotplug_support(void) } /* - * To accurately reflect hot un/plug changes of cpu and memory resources - * (including onling and offlining of those resources), the elfcorehdr - * (which is passed to the crash kernel via the elfcorehdr= parameter) - * must be updated with the new list of CPUs and memories. + * To accurately reflect hot un/plug changes of CPU and Memory resources + * (including onling and offlining of those resources), the relevant + * kexec segments must be updated with latest CPU and Memory resources. * - * In order to make changes to elfcorehdr, two conditions are needed: - * First, the segment containing the elfcorehdr must be large enough - * to permit a growing number of resources; the elfcorehdr memory size - * is based on NR_CPUS_DEFAULT and CRASH_MAX_MEMORY_RANGES. - * Second, purgatory must explicitly exclude the elfcorehdr from the - * list of segments it checks (since the elfcorehdr changes and thus - * would require an update to purgatory itself to update the digest). + * Architectures must ensure two things for all segments that need + * updating during hotplug events: + * + * 1. Segments must be large enough to accommodate a growing number of + * resources. + * 2. Exclude the segments from SHA verification. + * + * For example, on most architectures, the elfcorehdr (which is passed + * to the crash kernel via the elfcorehdr= parameter) must include the + * new list of CPUs and memory. To make changes to the elfcorehdr, it + * should be large enough to permit a growing number of CPU and Memory + * resources. One can estimate the elfcorehdr memory size based on + * NR_CPUS_DEFAULT and CRASH_MAX_MEMORY_RANGES. The elfcorehdr is + * excluded from SHA verification by default if the architecture + * supports crash hotplug. */ static void crash_handle_hotplug_event(unsigned int hp_action, unsigned int cpu, void *arg) { @@ -540,7 +547,7 @@ static void crash_handle_hotplug_event(unsigned int hp_action, unsigned int cpu, crash_hotplug_lock(); /* Obtain lock while changing crash information */ if (!kexec_trylock()) { - pr_info("kexec_trylock() failed, elfcorehdr may be inaccurate\n"); + pr_info("kexec_trylock() failed, kdump image may be inaccurate\n"); crash_hotplug_unlock(); return; } From e60255f07c6a1f5bbbd490a2db23fe9eee8c6fd4 Mon Sep 17 00:00:00 2001 From: Kuan-Wei Chiu Date: Sun, 11 Aug 2024 03:53:16 +0800 Subject: [PATCH 063/103] ocfs2: remove custom swap functions in favor of built-in sort swap The custom swap functions used in ocfs2 do not perform any special operations and can be replaced with the built-in swap function of sort. This change not only reduces code size but also improves efficiency, especially in scenarios where CONFIG_RETPOLINE is enabled, as it makes indirect function calls more expensive. By using the built-in swap, we avoid these costly indirect function calls, leading to better performance. Link: https://lkml.kernel.org/r/20240810195316.186504-1-visitorckw@gmail.com Signed-off-by: Kuan-Wei Chiu Reviewed-by: Joseph Qi Reviewed-by: Heming Zhao Cc: Ching-Chun (Jim) Huang Cc: Joel Becker Cc: Mark Fasheh Cc: Changwei Ge Cc: Gang He Cc: Jun Piao Cc: Junxiao Bi Signed-off-by: Andrew Morton --- fs/ocfs2/dir.c | 12 +----------- fs/ocfs2/refcounttree.c | 13 +++---------- fs/ocfs2/xattr.c | 15 +++------------ 3 files changed, 7 insertions(+), 33 deletions(-) diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c index f0beb173dbba..fa5d0819a997 100644 --- a/fs/ocfs2/dir.c +++ b/fs/ocfs2/dir.c @@ -3511,16 +3511,6 @@ static int dx_leaf_sort_cmp(const void *a, const void *b) return 0; } -static void dx_leaf_sort_swap(void *a, void *b, int size) -{ - struct ocfs2_dx_entry *entry1 = a; - struct ocfs2_dx_entry *entry2 = b; - - BUG_ON(size != sizeof(*entry1)); - - swap(*entry1, *entry2); -} - static int ocfs2_dx_leaf_same_major(struct ocfs2_dx_leaf *dx_leaf) { struct ocfs2_dx_entry_list *dl_list = &dx_leaf->dl_list; @@ -3781,7 +3771,7 @@ static int ocfs2_dx_dir_rebalance(struct ocfs2_super *osb, struct inode *dir, */ sort(dx_leaf->dl_list.de_entries, num_used, sizeof(struct ocfs2_dx_entry), dx_leaf_sort_cmp, - dx_leaf_sort_swap); + NULL); ocfs2_journal_dirty(handle, dx_leaf_bh); diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c index 1f303b1adf1a..4f85508538fc 100644 --- a/fs/ocfs2/refcounttree.c +++ b/fs/ocfs2/refcounttree.c @@ -1392,13 +1392,6 @@ static int cmp_refcount_rec_by_cpos(const void *a, const void *b) return 0; } -static void swap_refcount_rec(void *a, void *b, int size) -{ - struct ocfs2_refcount_rec *l = a, *r = b; - - swap(*l, *r); -} - /* * The refcount cpos are ordered by their 64bit cpos, * But we will use the low 32 bit to be the e_cpos in the b-tree. @@ -1474,7 +1467,7 @@ static int ocfs2_divide_leaf_refcount_block(struct buffer_head *ref_leaf_bh, */ sort(&rl->rl_recs, le16_to_cpu(rl->rl_used), sizeof(struct ocfs2_refcount_rec), - cmp_refcount_rec_by_low_cpos, swap_refcount_rec); + cmp_refcount_rec_by_low_cpos, NULL); ret = ocfs2_find_refcount_split_pos(rl, &cpos, &split_index); if (ret) { @@ -1499,11 +1492,11 @@ static int ocfs2_divide_leaf_refcount_block(struct buffer_head *ref_leaf_bh, sort(&rl->rl_recs, le16_to_cpu(rl->rl_used), sizeof(struct ocfs2_refcount_rec), - cmp_refcount_rec_by_cpos, swap_refcount_rec); + cmp_refcount_rec_by_cpos, NULL); sort(&new_rl->rl_recs, le16_to_cpu(new_rl->rl_used), sizeof(struct ocfs2_refcount_rec), - cmp_refcount_rec_by_cpos, swap_refcount_rec); + cmp_refcount_rec_by_cpos, NULL); *split_cpos = cpos; return 0; diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index 35c0cc2a51af..0e58a5ce539e 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c @@ -4167,15 +4167,6 @@ static int cmp_xe(const void *a, const void *b) return 0; } -static void swap_xe(void *a, void *b, int size) -{ - struct ocfs2_xattr_entry *l = a, *r = b, tmp; - - tmp = *l; - memcpy(l, r, sizeof(struct ocfs2_xattr_entry)); - memcpy(r, &tmp, sizeof(struct ocfs2_xattr_entry)); -} - /* * When the ocfs2_xattr_block is filled up, new bucket will be created * and all the xattr entries will be moved to the new bucket. @@ -4241,7 +4232,7 @@ static void ocfs2_cp_xattr_block_to_bucket(struct inode *inode, trace_ocfs2_cp_xattr_block_to_bucket_end(offset, size, off_change); sort(target + offset, count, sizeof(struct ocfs2_xattr_entry), - cmp_xe, swap_xe); + cmp_xe, NULL); } /* @@ -4436,7 +4427,7 @@ static int ocfs2_defrag_xattr_bucket(struct inode *inode, */ sort(entries, le16_to_cpu(xh->xh_count), sizeof(struct ocfs2_xattr_entry), - cmp_xe_offset, swap_xe); + cmp_xe_offset, NULL); /* Move all name/values to the end of the bucket. */ xe = xh->xh_entries; @@ -4478,7 +4469,7 @@ static int ocfs2_defrag_xattr_bucket(struct inode *inode, /* sort the entries by their name_hash. */ sort(entries, le16_to_cpu(xh->xh_count), sizeof(struct ocfs2_xattr_entry), - cmp_xe, swap_xe); + cmp_xe, NULL); buf = bucket_buf; for (i = 0; i < bucket->bu_blocks; i++, buf += blocksize) From 03222db82a3a0db43cbad00886c800819fdc59f3 Mon Sep 17 00:00:00 2001 From: Chi Zhiling Date: Thu, 15 Aug 2024 17:21:41 +0800 Subject: [PATCH 064/103] ocfs2: fix unexpected zeroing of virtual disk In a guest virtual machine, we found that there is unexpected data zeroing problem detected occassionly: XFS (vdb): Mounting V5 Filesystem XFS (vdb): Ending clean mount XFS (vdb): Metadata CRC error detected at xfs_refcountbt_read_verify+0x2c/0xf0, xfs_refcountbt block 0x200028 XFS (vdb): Unmount and run xfs_repair XFS (vdb): First 128 bytes of corrupted metadata buffer: 00000000e0cd2f5e: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00000000cafd57f5: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00000000d0298d7d: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00000000f0698484: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00000000adb789a7: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 000000005292b878: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00000000885b4700: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00000000fd4b4df7: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ XFS (vdb): metadata I/O error in "xfs_trans_read_buf_map" at daddr 0x200028 len 8 error 74 XFS (vdb): Error -117 recovering leftover CoW allocations. XFS (vdb): xfs_do_force_shutdown(0x8) called from line 994 of file fs/xfs/xfs_mount.c. Return address = 000000003a53523a XFS (vdb): Corruption of in-memory data detected. Shutting down filesystem XFS (vdb): Please umount the filesystem and rectify the problem(s) It turns out that the root cause is from the physical host machine. More specifically, it is caused by the ocfs2. when the page_size is 64k, the block should advance by 16 each time instead of 1. This will lead to a wrong mapping from the page to the disk, which will zero some adjacent part of the disk. Link: https://lkml.kernel.org/r/20240815092141.1223238-1-chizhiling@163.com Signed-off-by: Chi Zhiling Suggested-by: Shida Zhang Reviewed-by: Joseph Qi Reviewed-by: Heming Zhao Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Cc: Changwei Ge Cc: Gang He Cc: Jun Piao Signed-off-by: Andrew Morton --- fs/ocfs2/aops.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index 6be175a1ab3c..0364f82befda 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c @@ -1187,7 +1187,7 @@ static int ocfs2_write_cluster(struct address_space *mapping, /* This is the direct io target page. */ if (wc->w_pages[i] == NULL) { - p_blkno++; + p_blkno += (1 << (PAGE_SHIFT - inode->i_sb->s_blocksize_bits)); continue; } From 076979ee62f23c0eff035e0528b4cfadbe743255 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 17 Aug 2024 17:50:25 -0400 Subject: [PATCH 065/103] scripts/decode_stacktrace.sh: nix-ify nix only puts /usr/bin/env at the standard location (as required by posix), so shebangs have to be tweaked. Link: https://lkml.kernel.org/r/20240817215025.161628-1-kent.overstreet@linux.dev Signed-off-by: Kent Overstreet Cc: Bjorn Andersson Cc: Elliot Berman Cc: Xiong Nandi Signed-off-by: Andrew Morton --- scripts/decode_stacktrace.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/decode_stacktrace.sh b/scripts/decode_stacktrace.sh index a0f50a5b4f7c..ed9f914334cc 100755 --- a/scripts/decode_stacktrace.sh +++ b/scripts/decode_stacktrace.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # SPDX-License-Identifier: GPL-2.0 # (c) 2014, Sasha Levin #set -x From d994c238347d7ba4de15da00985e1bea75e91dc7 Mon Sep 17 00:00:00 2001 From: Mateusz Guzik Date: Sat, 17 Aug 2024 14:37:54 +0200 Subject: [PATCH 066/103] ratelimit: convert flags to int to save 8 bytes in size Only bit 1 is used, making an unsigned long a total overkill. This brings it from 40 to 32 bytes, which in turn shrinks user_struct from 136 to 128 bytes. Since the latter is allocated with hwalign, this means the total usage goes down from 192 to 128 bytes per object. No functional changes. Link: https://lkml.kernel.org/r/20240817123754.240924-1-mjguzik@gmail.com Signed-off-by: Mateusz Guzik Signed-off-by: Andrew Morton --- include/linux/ratelimit_types.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/ratelimit_types.h b/include/linux/ratelimit_types.h index 002266693e50..765232ce0b5e 100644 --- a/include/linux/ratelimit_types.h +++ b/include/linux/ratelimit_types.h @@ -19,8 +19,8 @@ struct ratelimit_state { int burst; int printed; int missed; + unsigned int flags; unsigned long begin; - unsigned long flags; }; #define RATELIMIT_STATE_INIT_FLAGS(name, interval_init, burst_init, flags_init) { \ From 7f86b2942791012ac7b4c481d1f84a58fd2fbcfc Mon Sep 17 00:00:00 2001 From: qasdev Date: Tue, 20 Aug 2024 02:22:09 +0100 Subject: [PATCH 067/103] ocfs2: fix shift-out-of-bounds UBSAN bug in ocfs2_verify_volume() This patch addresses a shift-out-of-bounds error in the ocfs2_verify_volume() function, identified by UBSAN. The bug was triggered by an invalid s_clustersize_bits value (e.g., 1548), which caused the expression "1 << le32_to_cpu(di->id2.i_super.s_clustersize_bits)" to exceed the limits of a 32-bit integer, leading to an out-of-bounds shift. Link: https://lkml.kernel.org/r/ZsPvwQAXd5R/jNY+@hostname Signed-off-by: Qasim Ijaz Reported-by: syzbot Closes: https://syzkaller.appspot.com/bug?extid=f3fff775402751ebb471 Tested-by: syzbot Reviewed-by: Joseph Qi Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Cc: Changwei Ge Cc: Gang He Cc: Jun Piao Signed-off-by: Andrew Morton --- fs/ocfs2/super.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index afee70125ae3..b704983b2112 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -2357,8 +2357,8 @@ static int ocfs2_verify_volume(struct ocfs2_dinode *di, (unsigned long long)bh->b_blocknr); } else if (le32_to_cpu(di->id2.i_super.s_clustersize_bits) < 12 || le32_to_cpu(di->id2.i_super.s_clustersize_bits) > 20) { - mlog(ML_ERROR, "bad cluster size found: %u\n", - 1 << le32_to_cpu(di->id2.i_super.s_clustersize_bits)); + mlog(ML_ERROR, "bad cluster size bit found: %u\n", + le32_to_cpu(di->id2.i_super.s_clustersize_bits)); } else if (!le64_to_cpu(di->id2.i_super.s_root_blkno)) { mlog(ML_ERROR, "bad root_blkno: 0\n"); } else if (!le64_to_cpu(di->id2.i_super.s_system_dir_blkno)) { From 105ae044d6f3d7f8136d1ddac4c708595f643562 Mon Sep 17 00:00:00 2001 From: Thorsten Blum Date: Tue, 20 Aug 2024 04:16:07 +0200 Subject: [PATCH 068/103] ocfs2: use max() to improve ocfs2_dlm_seq_show() Use the max() macro to simplify the ocfs2_dlm_seq_show() function and improve its readability. Link: https://lkml.kernel.org/r/20240820021605.97887-3-thorsten.blum@toblux.com Signed-off-by: Thorsten Blum Reviewed-by: Joseph Qi Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Cc: Changwei Ge Cc: Gang He Cc: Jun Piao Signed-off-by: Andrew Morton --- fs/ocfs2/dlmglue.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index da78a04d6f0b..60df52e4c1f8 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c @@ -3151,11 +3151,8 @@ static int ocfs2_dlm_seq_show(struct seq_file *m, void *v) #ifdef CONFIG_OCFS2_FS_STATS if (!lockres->l_lock_wait && dlm_debug->d_filter_secs) { now = ktime_to_us(ktime_get_real()); - if (lockres->l_lock_prmode.ls_last > - lockres->l_lock_exmode.ls_last) - last = lockres->l_lock_prmode.ls_last; - else - last = lockres->l_lock_exmode.ls_last; + last = max(lockres->l_lock_prmode.ls_last, + lockres->l_lock_exmode.ls_last); /* * Use d_filter_secs field to filter lock resources dump, * the default d_filter_secs(0) value filters nothing, From 62e6e7841701619c2390e4e6cc4089f38c2a6798 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Thu, 22 Aug 2024 00:46:23 +0900 Subject: [PATCH 069/103] nilfs2: treat missing sufile header block as metadata corruption Patch series "nilfs2: prevent unexpected ENOENT propagation". This series fixes potential issues where the result code -ENOENT, which is returned internally when a metadata file operation encouters a hole block, is exposed to user space without being properly handled. Several issues with the same cause leading to hangs or WARN_ON check failures have been reported by syzbot and fixed each time in the past. This collectively fixes the missing -ENOENT conversions that do not cause stability issues and are not covered by syzbot. This patch (of 5): The sufile, a metadata file that holds metadata for segment management, has statistical information in its first block, but if reading this block fails, it receives the internal code -ENOENT and returns it unchanged to the callers. To prevent this -ENOENT from being propagated to system calls, if reading the header block fails, return -EIO (or -EINVAL depending on the context) instead. Link: https://lkml.kernel.org/r/20240821154627.11848-1-konishi.ryusuke@gmail.com Link: https://lkml.kernel.org/r/20240821154627.11848-2-konishi.ryusuke@gmail.com Signed-off-by: Ryusuke Konishi Signed-off-by: Andrew Morton --- fs/nilfs2/sufile.c | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/fs/nilfs2/sufile.c b/fs/nilfs2/sufile.c index 7bfc0860acee..f071eba48163 100644 --- a/fs/nilfs2/sufile.c +++ b/fs/nilfs2/sufile.c @@ -79,10 +79,17 @@ nilfs_sufile_block_get_segment_usage(const struct inode *sufile, __u64 segnum, NILFS_MDT(sufile)->mi_entry_size; } -static inline int nilfs_sufile_get_header_block(struct inode *sufile, - struct buffer_head **bhp) +static int nilfs_sufile_get_header_block(struct inode *sufile, + struct buffer_head **bhp) { - return nilfs_mdt_get_block(sufile, 0, 0, NULL, bhp); + int err = nilfs_mdt_get_block(sufile, 0, 0, NULL, bhp); + + if (unlikely(err == -ENOENT)) { + nilfs_error(sufile->i_sb, + "missing header block in segment usage metadata"); + err = -EIO; + } + return err; } static inline int @@ -1237,9 +1244,15 @@ int nilfs_sufile_read(struct super_block *sb, size_t susize, if (err) goto failed; - err = nilfs_sufile_get_header_block(sufile, &header_bh); - if (err) + err = nilfs_mdt_get_block(sufile, 0, 0, NULL, &header_bh); + if (unlikely(err)) { + if (err == -ENOENT) { + nilfs_err(sb, + "missing header block in segment usage metadata"); + err = -EINVAL; + } goto failed; + } sui = NILFS_SUI(sufile); kaddr = kmap_local_page(header_bh->b_page); From d07d8ba4cee7b56aa8ff499776ce76323562660e Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Thu, 22 Aug 2024 00:46:24 +0900 Subject: [PATCH 070/103] nilfs2: treat missing cpfile header block as metadata corruption The cpfile, a metadata file that holds metadata for checkpoint management, also has statistical information in its first block, and if reading this block fails, it receives the internal code -ENOENT and returns that code to the callers. As with sufile, to prevent this -ENOENT from being propagated to system calls, return -EIO instead when reading the header block fails. Link: https://lkml.kernel.org/r/20240821154627.11848-3-konishi.ryusuke@gmail.com Signed-off-by: Ryusuke Konishi Signed-off-by: Andrew Morton --- fs/nilfs2/cpfile.c | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/fs/nilfs2/cpfile.c b/fs/nilfs2/cpfile.c index 9c8d531cffa7..f0ce37552446 100644 --- a/fs/nilfs2/cpfile.c +++ b/fs/nilfs2/cpfile.c @@ -125,10 +125,17 @@ static void nilfs_cpfile_block_init(struct inode *cpfile, } } -static inline int nilfs_cpfile_get_header_block(struct inode *cpfile, - struct buffer_head **bhp) +static int nilfs_cpfile_get_header_block(struct inode *cpfile, + struct buffer_head **bhp) { - return nilfs_mdt_get_block(cpfile, 0, 0, NULL, bhp); + int err = nilfs_mdt_get_block(cpfile, 0, 0, NULL, bhp); + + if (unlikely(err == -ENOENT)) { + nilfs_error(cpfile->i_sb, + "missing header block in checkpoint metadata"); + err = -EIO; + } + return err; } static inline int nilfs_cpfile_get_checkpoint_block(struct inode *cpfile, @@ -283,14 +290,9 @@ int nilfs_cpfile_create_checkpoint(struct inode *cpfile, __u64 cno) down_write(&NILFS_MDT(cpfile)->mi_sem); ret = nilfs_cpfile_get_header_block(cpfile, &header_bh); - if (unlikely(ret < 0)) { - if (ret == -ENOENT) { - nilfs_error(cpfile->i_sb, - "checkpoint creation failed due to metadata corruption."); - ret = -EIO; - } + if (unlikely(ret < 0)) goto out_sem; - } + ret = nilfs_cpfile_get_checkpoint_block(cpfile, cno, 1, &cp_bh); if (unlikely(ret < 0)) goto out_header; From 5b527d38644686dc11e29468463aa7affa282e31 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Thu, 22 Aug 2024 00:46:25 +0900 Subject: [PATCH 071/103] nilfs2: do not propagate ENOENT error from sufile during recovery nilfs_sufile_free() returns the error code -ENOENT when the block where the segment usage should be placed does not exist (hole block case), but this error should not be propagated upwards to the mount system call. In nilfs_prepare_segment_for_recovery(), one of the recovery steps during mount, nilfs_sufile_free() is used and may return -ENOENT as is, so in that case return -EINVAL instead. Link: https://lkml.kernel.org/r/20240821154627.11848-4-konishi.ryusuke@gmail.com Signed-off-by: Ryusuke Konishi Signed-off-by: Andrew Morton --- fs/nilfs2/recovery.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/fs/nilfs2/recovery.c b/fs/nilfs2/recovery.c index b638dc06df2f..fe3a5a767700 100644 --- a/fs/nilfs2/recovery.c +++ b/fs/nilfs2/recovery.c @@ -433,8 +433,17 @@ static int nilfs_prepare_segment_for_recovery(struct the_nilfs *nilfs, * The next segment is invalidated by this recovery. */ err = nilfs_sufile_free(sufile, segnum[1]); - if (unlikely(err)) + if (unlikely(err)) { + if (err == -ENOENT) { + nilfs_err(sb, + "checkpoint log inconsistency at block %llu (segment %llu): next segment %llu is unallocated", + (unsigned long long)nilfs->ns_last_pseg, + (unsigned long long)nilfs->ns_segnum, + (unsigned long long)segnum[1]); + err = -EINVAL; + } goto failed; + } for (i = 1; i < 4; i++) { err = nilfs_segment_list_add(head, segnum[i]); From 0b9aad46c1634527c6a9f951f72c31be67f9b25c Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Thu, 22 Aug 2024 00:46:26 +0900 Subject: [PATCH 072/103] nilfs2: do not propagate ENOENT error from sufile during GC nilfs_sufile_freev(), which is used to free segments in GC, aborts with -ENOENT if the target segment usage is on a hole block. This error only occurs if one of the segment numbers to be freed passed by the GC ioctl is invalid, so return -EINVAL instead. To avoid impairing readability, introduce a wrapper function that encapsulates error handling including the error code conversion (and error message output). Link: https://lkml.kernel.org/r/20240821154627.11848-5-konishi.ryusuke@gmail.com Signed-off-by: Ryusuke Konishi Signed-off-by: Andrew Morton --- fs/nilfs2/segment.c | 64 +++++++++++++++++++++++++++++++++++++++------ 1 file changed, 56 insertions(+), 8 deletions(-) diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c index 0ca3110d6386..2a771e222d86 100644 --- a/fs/nilfs2/segment.c +++ b/fs/nilfs2/segment.c @@ -1102,12 +1102,64 @@ static int nilfs_segctor_scan_file_dsync(struct nilfs_sc_info *sci, return err; } +/** + * nilfs_free_segments - free the segments given by an array of segment numbers + * @nilfs: nilfs object + * @segnumv: array of segment numbers to be freed + * @nsegs: number of segments to be freed in @segnumv + * + * nilfs_free_segments() wraps nilfs_sufile_freev() and + * nilfs_sufile_cancel_freev(), and edits the segment usage metadata file + * (sufile) to free all segments given by @segnumv and @nsegs at once. If + * it fails midway, it cancels the changes so that none of the segments are + * freed. If @nsegs is 0, this function does nothing. + * + * The freeing of segments is not finalized until the writing of a log with + * a super root block containing this sufile change is complete, and it can + * be canceled with nilfs_sufile_cancel_freev() until then. + * + * Return: 0 on success, or the following negative error code on failure. + * * %-EINVAL - Invalid segment number. + * * %-EIO - I/O error (including metadata corruption). + * * %-ENOMEM - Insufficient memory available. + */ +static int nilfs_free_segments(struct the_nilfs *nilfs, __u64 *segnumv, + size_t nsegs) +{ + size_t ndone; + int ret; + + if (!nsegs) + return 0; + + ret = nilfs_sufile_freev(nilfs->ns_sufile, segnumv, nsegs, &ndone); + if (unlikely(ret)) { + nilfs_sufile_cancel_freev(nilfs->ns_sufile, segnumv, ndone, + NULL); + /* + * If a segment usage of the segments to be freed is in a + * hole block, nilfs_sufile_freev() will return -ENOENT. + * In this case, -EINVAL should be returned to the caller + * since there is something wrong with the given segment + * number array. This error can only occur during GC, so + * there is no need to worry about it propagating to other + * callers (such as fsync). + */ + if (ret == -ENOENT) { + nilfs_err(nilfs->ns_sb, + "The segment usage entry %llu to be freed is invalid (in a hole)", + (unsigned long long)segnumv[ndone]); + ret = -EINVAL; + } + } + return ret; +} + static int nilfs_segctor_collect_blocks(struct nilfs_sc_info *sci, int mode) { struct the_nilfs *nilfs = sci->sc_super->s_fs_info; struct list_head *head; struct nilfs_inode_info *ii; - size_t ndone; int err = 0; switch (nilfs_sc_cstage_get(sci)) { @@ -1201,14 +1253,10 @@ static int nilfs_segctor_collect_blocks(struct nilfs_sc_info *sci, int mode) nilfs_sc_cstage_inc(sci); fallthrough; case NILFS_ST_SUFILE: - err = nilfs_sufile_freev(nilfs->ns_sufile, sci->sc_freesegs, - sci->sc_nfreesegs, &ndone); - if (unlikely(err)) { - nilfs_sufile_cancel_freev(nilfs->ns_sufile, - sci->sc_freesegs, ndone, - NULL); + err = nilfs_free_segments(nilfs, sci->sc_freesegs, + sci->sc_nfreesegs); + if (unlikely(err)) break; - } sci->sc_stage.flags |= NILFS_CF_SUFREED; err = nilfs_segctor_scan_file(sci, nilfs->ns_sufile, From d18e4233d88b1ed95016a465ad5751629f9d70b9 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Thu, 22 Aug 2024 00:46:27 +0900 Subject: [PATCH 073/103] nilfs2: do not propagate ENOENT error from nilfs_sufile_mark_dirty() nilfs_sufile_mark_dirty(), which marks a block in the sufile metadata file as dirty in preparation for log writing, returns -ENOENT to the caller if the block containing the segment usage of the specified segment is missing. This internal code can propagate through the log writer to system calls such as fsync. To prevent this, treat this case as a filesystem error and return -EIO instead. Link: https://lkml.kernel.org/r/20240821154627.11848-6-konishi.ryusuke@gmail.com Signed-off-by: Ryusuke Konishi Signed-off-by: Andrew Morton --- fs/nilfs2/sufile.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/fs/nilfs2/sufile.c b/fs/nilfs2/sufile.c index f071eba48163..eea5a6a12f7b 100644 --- a/fs/nilfs2/sufile.c +++ b/fs/nilfs2/sufile.c @@ -513,8 +513,15 @@ int nilfs_sufile_mark_dirty(struct inode *sufile, __u64 segnum) down_write(&NILFS_MDT(sufile)->mi_sem); ret = nilfs_sufile_get_segment_usage_block(sufile, segnum, 0, &bh); - if (ret) + if (unlikely(ret)) { + if (ret == -ENOENT) { + nilfs_error(sufile->i_sb, + "segment usage for segment %llu is unreadable due to a hole block", + (unsigned long long)segnum); + ret = -EIO; + } goto out_sem; + } kaddr = kmap_local_page(bh->b_page); su = nilfs_sufile_block_get_segment_usage(sufile, segnum, bh, kaddr); From fb54ea1ee84534cab6a15515c73a0811bdcbc973 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 21 Aug 2024 18:51:04 +0300 Subject: [PATCH 074/103] dimlib: use *-y instead of *-objs in Makefile *-objs suffix is reserved rather for (user-space) host programs while usually *-y suffix is used for kernel drivers (although *-objs works for that purpose for now). Let's correct the old usages of *-objs in Makefiles. Link: https://lkml.kernel.org/r/20240821155140.611514-1-andriy.shevchenko@linux.intel.com Signed-off-by: Andy Shevchenko Acked-by: Florian Fainelli Reviewed-by: Alexander Lobakin Cc: Rasmus Villemoes Cc: Tal Gilboa Signed-off-by: Andrew Morton --- lib/dim/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/dim/Makefile b/lib/dim/Makefile index c4cc4026c451..5b9bfaac7ac1 100644 --- a/lib/dim/Makefile +++ b/lib/dim/Makefile @@ -4,4 +4,4 @@ obj-$(CONFIG_DIMLIB) += dimlib.o -dimlib-objs := dim.o net_dim.o rdma_dim.o +dimlib-y := dim.o net_dim.o rdma_dim.o From 38676d9e33133c0c39951b812b19cc5b9ff1978a Mon Sep 17 00:00:00 2001 From: Yang Ruibin <11162571@vivo.com> Date: Wed, 21 Aug 2024 03:34:40 -0400 Subject: [PATCH 075/103] lib: fix the NULL vs IS_ERR() bug for debugfs_create_dir() debugfs_create_dir() returns error pointers. It never returns NULL. So use IS_ERR() to check it. Link: https://lkml.kernel.org/r/20240821073441.9701-1-11162571@vivo.com Signed-off-by: Yang Ruibin <11162571@vivo.com> Signed-off-by: Andrew Morton --- lib/test_fpu_glue.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/test_fpu_glue.c b/lib/test_fpu_glue.c index 074f30301f29..c0596426370a 100644 --- a/lib/test_fpu_glue.c +++ b/lib/test_fpu_glue.c @@ -42,7 +42,7 @@ static int __init test_fpu_init(void) return -EINVAL; selftest_dir = debugfs_create_dir("selftest_helpers", NULL); - if (!selftest_dir) + if (IS_ERR(selftest_dir)) return -ENOMEM; debugfs_create_file_unsafe("test_fpu", 0444, selftest_dir, NULL, From 0f69dc295b681753ac3455705357e600bc9c7745 Mon Sep 17 00:00:00 2001 From: Luca Ceresoli Date: Fri, 23 Aug 2024 10:27:42 +0200 Subject: [PATCH 076/103] scripts/decode_stacktrace.sh: remove find_module recursion and improve error reporting MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Patch series "scripts/decode_stacktrace.sh: improve error reporting and usability", v2. This small series improves usability of scripts/decode_stacktrace.sh by improving the usage text and correctly reporting when modules are built without debugging symbols. This patch (of 3): The find_module() function can fail for two reasons: * the module was not found * the module was found but without debugging info In both cases the user is reported the same error: WARNING! Modules path isn't set, but is needed to parse this symbol This is misleading in case the modules path is set correctly. find_module() is currently implemented as a recursive function based on global variables in order to check up to 4 different paths. This is not straightforward to read and even less to modify. Besides, the debuginfo code at the beginning of find_module() is executed identically every time the function is entered, i.e. up to 4 times per each module search due to recursion. To be able to improve error reporting, first rewrite the find_module() function to remove recursion. The new version of the function iterates over all the same (up to 4) paths as before and for each of them does the same checks as before. At the end of the iteration it is now able to print an appropriate error message, so that has been moved from the caller into find_module(). Finally, when the module is found but without debugging info, mention the two Kconfig variables one needs to set in order to have the needed debugging symbols. Link: https://lkml.kernel.org/r/20240823-decode_stacktrace-find_module-improvements-v2-0-d7a57d35558b@bootlin.com Link: https://lkml.kernel.org/r/20240823-decode_stacktrace-find_module-improvements-v2-1-d7a57d35558b@bootlin.com Signed-off-by: Luca Ceresoli Reviewed-by: Stephen Boyd Cc: Alexis Lothoré (eBPF Foundation) Cc: Konstantin Khlebnikov Cc: Luca Ceresoli Cc: Sasha Levin Cc: Thomas Petazzoni Signed-off-by: Andrew Morton --- scripts/decode_stacktrace.sh | 40 ++++++++++++++++++------------------ 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/scripts/decode_stacktrace.sh b/scripts/decode_stacktrace.sh index ed9f914334cc..e6b38ab7c8c5 100755 --- a/scripts/decode_stacktrace.sh +++ b/scripts/decode_stacktrace.sh @@ -89,31 +89,32 @@ find_module() { fi fi - if [[ "$modpath" != "" ]] ; then - for fn in $(find "$modpath" -name "${module//_/[-_]}.ko*") ; do - if ${READELF} -WS "$fn" | grep -qwF .debug_line ; then - echo $fn - return - fi - done - return 1 - fi - - modpath=$(dirname "$vmlinux") - find_module && return - - if [[ $release == "" ]] ; then + if [ -z $release ] ; then release=$(gdb -ex 'print init_uts_ns.name.release' -ex 'quit' -quiet -batch "$vmlinux" 2>/dev/null | sed -n 's/\$1 = "\(.*\)".*/\1/p') fi + if [ -n "${release}" ] ; then + release_dirs="/usr/lib/debug/lib/modules/$release /lib/modules/$release" + fi - for dn in {/usr/lib/debug,}/lib/modules/$release ; do - if [ -e "$dn" ] ; then - modpath="$dn" - find_module && return + found_without_debug_info=false + for dir in "$modpath" "$(dirname "$vmlinux")" ${release_dirs}; do + if [ -n "${dir}" ] && [ -e "${dir}" ]; then + for fn in $(find "$dir" -name "${module//_/[-_]}.ko*") ; do + if ${READELF} -WS "$fn" | grep -qwF .debug_line ; then + echo $fn + return + fi + found_without_debug_info=true + done fi done - modpath="" + if [[ ${found_without_debug_info} == true ]]; then + echo "WARNING! No debugging info in module ${module}, rebuild with DEBUG_KERNEL and DEBUG_INFO" >&2 + else + echo "WARNING! Cannot find .ko for module ${module}, please pass a valid module path" >&2 + fi + return 1 } @@ -131,7 +132,6 @@ parse_symbol() { else local objfile=$(find_module) if [[ $objfile == "" ]] ; then - echo "WARNING! Modules path isn't set, but is needed to parse this symbol" >&2 return fi if [[ $aarray_support == true ]]; then From a6d05e826d48cdffe11d9b73cf386840c19129d4 Mon Sep 17 00:00:00 2001 From: Luca Ceresoli Date: Fri, 23 Aug 2024 10:27:43 +0200 Subject: [PATCH 077/103] scripts/decode_stacktrace.sh: clarify command line MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The syntax as expressed by usage() is not entirely correct: "" cannot be passed without "|auto". Additionally human reading of this syntax can be subject to misunderstanding due the mixture of '|' and '[]'. Improve readability in various ways: * rewrite using two lines for the two allowed usages * add square brackets around "" as it is optional when using debuginfod-find * move "" to inside the square brackets of the 2nd positional parameter * use underscores instead of spaces in <...> strings Link: https://lkml.kernel.org/r/20240823-decode_stacktrace-find_module-improvements-v2-2-d7a57d35558b@bootlin.com Signed-off-by: Luca Ceresoli Reviewed-by: Stephen Boyd Cc: Alexis Lothoré (eBPF Foundation) Cc: Konstantin Khlebnikov Cc: Sasha Levin Cc: Thomas Petazzoni Signed-off-by: Andrew Morton --- scripts/decode_stacktrace.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/scripts/decode_stacktrace.sh b/scripts/decode_stacktrace.sh index e6b38ab7c8c5..bac7ea8ee24f 100755 --- a/scripts/decode_stacktrace.sh +++ b/scripts/decode_stacktrace.sh @@ -5,7 +5,8 @@ usage() { echo "Usage:" - echo " $0 -r | [|auto] []" + echo " $0 -r " + echo " $0 [ [|auto []]]" } # Try to find a Rust demangler From 7e1083598909f0fda82a0bf8cf788524ce4fccff Mon Sep 17 00:00:00 2001 From: Luca Ceresoli Date: Fri, 23 Aug 2024 10:27:44 +0200 Subject: [PATCH 078/103] scripts/decode_stacktrace.sh: add '-h' flag MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When no parameters are passed, the usage instructions are presented only when debuginfod-find is not found. This makes sense because with debuginfod none of the positional parameters are needed. However it means that users having debuginfod-find installed will have no chance of reading the usage text without opening the file. Many programs have a '-h' flag to get the usage, so add such a flag. Invoking 'scripts/decode_stacktrace.sh -h' will now show the usage text and exit. Link: https://lkml.kernel.org/r/20240823-decode_stacktrace-find_module-improvements-v2-3-d7a57d35558b@bootlin.com Signed-off-by: Luca Ceresoli Reviewed-by: Stephen Boyd Cc: Alexis Lothoré (eBPF Foundation) Cc: Konstantin Khlebnikov Cc: Sasha Levin Cc: Thomas Petazzoni Signed-off-by: Andrew Morton --- scripts/decode_stacktrace.sh | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/scripts/decode_stacktrace.sh b/scripts/decode_stacktrace.sh index bac7ea8ee24f..826836d264c6 100755 --- a/scripts/decode_stacktrace.sh +++ b/scripts/decode_stacktrace.sh @@ -7,6 +7,7 @@ usage() { echo "Usage:" echo " $0 -r " echo " $0 [ [|auto []]]" + echo " $0 -h" } # Try to find a Rust demangler @@ -33,7 +34,10 @@ READELF=${UTIL_PREFIX}readelf${UTIL_SUFFIX} ADDR2LINE=${UTIL_PREFIX}addr2line${UTIL_SUFFIX} NM=${UTIL_PREFIX}nm${UTIL_SUFFIX} -if [[ $1 == "-r" ]] ; then +if [[ $1 == "-h" ]] ; then + usage + exit 0 +elif [[ $1 == "-r" ]] ; then vmlinux="" basepath="auto" modpath="" From e16c7b07784f3fb03025939c4590b9a7c64970a7 Mon Sep 17 00:00:00 2001 From: Chen Yu Date: Tue, 27 Aug 2024 19:23:08 +0800 Subject: [PATCH 079/103] kthread: fix task state in kthread worker if being frozen MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When analyzing a kernel waring message, Peter pointed out that there is a race condition when the kworker is being frozen and falls into try_to_freeze() with TASK_INTERRUPTIBLE, which could trigger a might_sleep() warning in try_to_freeze(). Although the root cause is not related to freeze()[1], it is still worthy to fix this issue ahead. One possible race scenario: CPU 0 CPU 1 ----- ----- // kthread_worker_fn set_current_state(TASK_INTERRUPTIBLE); suspend_freeze_processes() freeze_processes static_branch_inc(&freezer_active); freeze_kernel_threads pm_nosig_freezing = true; if (work) { //false __set_current_state(TASK_RUNNING); } else if (!freezing(current)) //false, been frozen freezing(): if (static_branch_unlikely(&freezer_active)) if (pm_nosig_freezing) return true; schedule() } // state is still TASK_INTERRUPTIBLE try_to_freeze() might_sleep() <--- warning Fix this by explicitly set the TASK_RUNNING before entering try_to_freeze(). Link: https://lore.kernel.org/lkml/Zs2ZoAcUsZMX2B%2FI@chenyu5-mobl2/ [1] Link: https://lkml.kernel.org/r/20240827112308.181081-1-yu.c.chen@intel.com Fixes: b56c0d8937e6 ("kthread: implement kthread_worker") Signed-off-by: Chen Yu Suggested-by: Peter Zijlstra Suggested-by: Andrew Morton Cc: Andreas Gruenbacher Cc: David Gow Cc: Mateusz Guzik Cc: Mickaël Salaün Cc: Tejun Heo Signed-off-by: Andrew Morton --- kernel/kthread.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/kernel/kthread.c b/kernel/kthread.c index f7be976ff88a..db4ceb0f503c 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -845,8 +845,16 @@ repeat: * event only cares about the address. */ trace_sched_kthread_work_execute_end(work, func); - } else if (!freezing(current)) + } else if (!freezing(current)) { schedule(); + } else { + /* + * Handle the case where the current remains + * TASK_INTERRUPTIBLE. try_to_freeze() expects + * the current to be TASK_RUNNING. + */ + __set_current_state(TASK_RUNNING); + } try_to_freeze(); cond_resched(); From 32cebfe1cc21a84e4a907575bb9fd1c3f6b091fd Mon Sep 17 00:00:00 2001 From: Hongbo Li Date: Tue, 27 Aug 2024 10:45:15 +0800 Subject: [PATCH 080/103] lib/string_choices: add str_true_false()/str_false_true() helper Add str_true_false()/str_false_true() helper to retur a "true" or "false" string literal. We found more than 10 cases currently exist in the tree. So these helpers can be used for these cases. This patch (of 3): Add str_true_false()/str_false_true() helper to return "true" or "false" string literal. Link: https://lkml.kernel.org/r/20240827024517.914100-1-lihongbo22@huawei.com Link: https://lkml.kernel.org/r/20240827024517.914100-2-lihongbo22@huawei.com Signed-off-by: Hongbo Li Cc: Andy Shevchenko Cc: Anna Schumaker Cc: Greg Kroah-Hartman Cc: Kees Cook Cc: Matthew Wilcox Cc: Trond Myklebust Signed-off-by: Andrew Morton --- include/linux/string_choices.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/include/linux/string_choices.h b/include/linux/string_choices.h index d9ebe20229f8..4a2432313b8e 100644 --- a/include/linux/string_choices.h +++ b/include/linux/string_choices.h @@ -42,6 +42,12 @@ static inline const char *str_yes_no(bool v) return v ? "yes" : "no"; } +static inline const char *str_true_false(bool v) +{ + return v ? "true" : "false"; +} +#define str_false_true(v) str_true_false(!(v)) + /** * str_plural - Return the simple pluralization based on English counts * @num: Number used for deciding pluralization From 01b58b1763b36d5597c9fbd6951d7e0386eebfb0 Mon Sep 17 00:00:00 2001 From: Hongbo Li Date: Tue, 27 Aug 2024 10:45:16 +0800 Subject: [PATCH 081/103] mm: make use of str_true_false helper The helper str_true_false() was introduced to return "true/false" string literal. We can simplify this format by str_true_false. Link: https://lkml.kernel.org/r/20240827024517.914100-3-lihongbo22@huawei.com Signed-off-by: Hongbo Li Cc: Andy Shevchenko Cc: Anna Schumaker Cc: Greg Kroah-Hartman Cc: Kees Cook Cc: Matthew Wilcox Cc: Trond Myklebust Signed-off-by: Andrew Morton --- mm/memory-tiers.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/mm/memory-tiers.c b/mm/memory-tiers.c index 4775b3a3dabe..af9f5b0d6f1b 100644 --- a/mm/memory-tiers.c +++ b/mm/memory-tiers.c @@ -921,8 +921,7 @@ bool numa_demotion_enabled = false; static ssize_t demotion_enabled_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { - return sysfs_emit(buf, "%s\n", - numa_demotion_enabled ? "true" : "false"); + return sysfs_emit(buf, "%s\n", str_true_false(numa_demotion_enabled)); } static ssize_t demotion_enabled_store(struct kobject *kobj, From 093ebfbbf3ba2002f7242b5c17a1845917f09cb5 Mon Sep 17 00:00:00 2001 From: Hongbo Li Date: Tue, 27 Aug 2024 10:45:17 +0800 Subject: [PATCH 082/103] nfs make use of str_false_true helper The helper str_false_true() was introduced to return "false/true" string literal. We can simplify this format by str_false_true. Link: https://lkml.kernel.org/r/20240827024517.914100-4-lihongbo22@huawei.com Signed-off-by: Hongbo Li Cc: Andy Shevchenko Cc: Anna Schumaker Cc: Greg Kroah-Hartman Cc: Kees Cook Cc: Trond Myklebust Cc: Matthew Wilcox Signed-off-by: Andrew Morton --- fs/nfs/nfs4xdr.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 7704a4509676..61190d6a5a77 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -3447,7 +3447,7 @@ static int decode_attr_link_support(struct xdr_stream *xdr, uint32_t *bitmap, ui *res = be32_to_cpup(p); bitmap[0] &= ~FATTR4_WORD0_LINK_SUPPORT; } - dprintk("%s: link support=%s\n", __func__, *res == 0 ? "false" : "true"); + dprintk("%s: link support=%s\n", __func__, str_false_true(*res == 0)); return 0; } @@ -3465,7 +3465,7 @@ static int decode_attr_symlink_support(struct xdr_stream *xdr, uint32_t *bitmap, *res = be32_to_cpup(p); bitmap[0] &= ~FATTR4_WORD0_SYMLINK_SUPPORT; } - dprintk("%s: symlink support=%s\n", __func__, *res == 0 ? "false" : "true"); + dprintk("%s: symlink support=%s\n", __func__, str_false_true(*res == 0)); return 0; } @@ -3607,7 +3607,7 @@ static int decode_attr_case_insensitive(struct xdr_stream *xdr, uint32_t *bitmap *res = be32_to_cpup(p); bitmap[0] &= ~FATTR4_WORD0_CASE_INSENSITIVE; } - dprintk("%s: case_insensitive=%s\n", __func__, *res == 0 ? "false" : "true"); + dprintk("%s: case_insensitive=%s\n", __func__, str_false_true(*res == 0)); return 0; } @@ -3625,7 +3625,7 @@ static int decode_attr_case_preserving(struct xdr_stream *xdr, uint32_t *bitmap, *res = be32_to_cpup(p); bitmap[0] &= ~FATTR4_WORD0_CASE_PRESERVING; } - dprintk("%s: case_preserving=%s\n", __func__, *res == 0 ? "false" : "true"); + dprintk("%s: case_preserving=%s\n", __func__, str_false_true(*res == 0)); return 0; } @@ -4333,8 +4333,7 @@ static int decode_attr_xattrsupport(struct xdr_stream *xdr, uint32_t *bitmap, *res = be32_to_cpup(p); bitmap[2] &= ~FATTR4_WORD2_XATTR_SUPPORT; } - dprintk("%s: XATTR support=%s\n", __func__, - *res == 0 ? "false" : "true"); + dprintk("%s: XATTR support=%s\n", __func__, str_false_true(*res == 0)); return 0; } From 9abca1a71c0e5f78e7ce91af8ad03e8d9893dc54 Mon Sep 17 00:00:00 2001 From: Huang Xiaojia Date: Tue, 27 Aug 2024 02:41:09 +0900 Subject: [PATCH 083/103] nilfs2: use common implementation of file type Patch series "nilfs2: assorted cleanups". This is a collection of cleanup patches, with only the last three focused on the log writer thread, the rest are miscellaneous. Patches 1/8, 4/8, and 7/8 adopt common implementations, 2/8 uses a generic macro, 5/8 removes dead code, 6/8 removes an unnecessary reference, and 3/8 and 8/8 each simplify a paticular messy implementation. This patch (of 8): Deduplicate the nilfs2 file type conversion implementation. Link: https://lkml.kernel.org/r/20240826174116.5008-1-konishi.ryusuke@gmail.com Link: https://lkml.kernel.org/r/20240815013442.1220909-1-huangxiaojia2@huawei.com Link: https://lkml.kernel.org/r/20240826174116.5008-2-konishi.ryusuke@gmail.com Signed-off-by: Huang Xiaojia Signed-off-by: Ryusuke Konishi Signed-off-by: Andrew Morton --- fs/nilfs2/dir.c | 44 +++++--------------------------------------- 1 file changed, 5 insertions(+), 39 deletions(-) diff --git a/fs/nilfs2/dir.c b/fs/nilfs2/dir.c index 4a29b0138d75..ba6bc6efcf11 100644 --- a/fs/nilfs2/dir.c +++ b/fs/nilfs2/dir.c @@ -231,37 +231,6 @@ static struct nilfs_dir_entry *nilfs_next_entry(struct nilfs_dir_entry *p) nilfs_rec_len_from_disk(p->rec_len)); } -static unsigned char -nilfs_filetype_table[NILFS_FT_MAX] = { - [NILFS_FT_UNKNOWN] = DT_UNKNOWN, - [NILFS_FT_REG_FILE] = DT_REG, - [NILFS_FT_DIR] = DT_DIR, - [NILFS_FT_CHRDEV] = DT_CHR, - [NILFS_FT_BLKDEV] = DT_BLK, - [NILFS_FT_FIFO] = DT_FIFO, - [NILFS_FT_SOCK] = DT_SOCK, - [NILFS_FT_SYMLINK] = DT_LNK, -}; - -#define S_SHIFT 12 -static unsigned char -nilfs_type_by_mode[(S_IFMT >> S_SHIFT) + 1] = { - [S_IFREG >> S_SHIFT] = NILFS_FT_REG_FILE, - [S_IFDIR >> S_SHIFT] = NILFS_FT_DIR, - [S_IFCHR >> S_SHIFT] = NILFS_FT_CHRDEV, - [S_IFBLK >> S_SHIFT] = NILFS_FT_BLKDEV, - [S_IFIFO >> S_SHIFT] = NILFS_FT_FIFO, - [S_IFSOCK >> S_SHIFT] = NILFS_FT_SOCK, - [S_IFLNK >> S_SHIFT] = NILFS_FT_SYMLINK, -}; - -static void nilfs_set_de_type(struct nilfs_dir_entry *de, struct inode *inode) -{ - umode_t mode = inode->i_mode; - - de->file_type = nilfs_type_by_mode[(mode & S_IFMT)>>S_SHIFT]; -} - static int nilfs_readdir(struct file *file, struct dir_context *ctx) { loff_t pos = ctx->pos; @@ -297,10 +266,7 @@ static int nilfs_readdir(struct file *file, struct dir_context *ctx) if (de->inode) { unsigned char t; - if (de->file_type < NILFS_FT_MAX) - t = nilfs_filetype_table[de->file_type]; - else - t = DT_UNKNOWN; + t = fs_ftype_to_dtype(de->file_type); if (!dir_emit(ctx, de->name, de->name_len, le64_to_cpu(de->inode), t)) { @@ -444,7 +410,7 @@ void nilfs_set_link(struct inode *dir, struct nilfs_dir_entry *de, err = nilfs_prepare_chunk(folio, from, to); BUG_ON(err); de->inode = cpu_to_le64(inode->i_ino); - nilfs_set_de_type(de, inode); + de->file_type = fs_umode_to_ftype(inode->i_mode); nilfs_commit_chunk(folio, mapping, from, to); inode_set_mtime_to_ts(dir, inode_set_ctime_current(dir)); } @@ -531,7 +497,7 @@ got_it: de->name_len = namelen; memcpy(de->name, name, namelen); de->inode = cpu_to_le64(inode->i_ino); - nilfs_set_de_type(de, inode); + de->file_type = fs_umode_to_ftype(inode->i_mode); nilfs_commit_chunk(folio, folio->mapping, from, to); inode_set_mtime_to_ts(dir, inode_set_ctime_current(dir)); nilfs_mark_inode_dirty(dir); @@ -612,14 +578,14 @@ int nilfs_make_empty(struct inode *inode, struct inode *parent) de->rec_len = nilfs_rec_len_to_disk(NILFS_DIR_REC_LEN(1)); memcpy(de->name, ".\0\0", 4); de->inode = cpu_to_le64(inode->i_ino); - nilfs_set_de_type(de, inode); + de->file_type = fs_umode_to_ftype(inode->i_mode); de = (struct nilfs_dir_entry *)(kaddr + NILFS_DIR_REC_LEN(1)); de->name_len = 2; de->rec_len = nilfs_rec_len_to_disk(chunk_size - NILFS_DIR_REC_LEN(1)); de->inode = cpu_to_le64(parent->i_ino); memcpy(de->name, "..\0", 4); - nilfs_set_de_type(de, inode); + de->file_type = fs_umode_to_ftype(inode->i_mode); kunmap_local(kaddr); nilfs_commit_chunk(folio, mapping, 0, chunk_size); fail: From 21176c0ae4ac9ab5ca7e1b7e6c234dee2a0022f0 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Tue, 27 Aug 2024 02:41:10 +0900 Subject: [PATCH 084/103] nilfs2: use the BITS_PER_LONG macro The macros NILFS_BMAP_KEY_BIT and NILFS_BMAP_NEW_PTR_INIT calculate, within their definitions, the number of bits in an unsigned long variable. Use the BITS_PER_LONG macro to make them simpler. Link: https://lkml.kernel.org/r/20240826174116.5008-3-konishi.ryusuke@gmail.com Signed-off-by: Ryusuke Konishi Cc: Huang Xiaojia Signed-off-by: Andrew Morton --- fs/nilfs2/bmap.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/fs/nilfs2/bmap.h b/fs/nilfs2/bmap.h index 5f8c8c552620..4656df392722 100644 --- a/fs/nilfs2/bmap.h +++ b/fs/nilfs2/bmap.h @@ -87,9 +87,8 @@ struct nilfs_bmap_operations { #define NILFS_BMAP_SIZE (NILFS_INODE_BMAP_SIZE * sizeof(__le64)) -#define NILFS_BMAP_KEY_BIT (sizeof(unsigned long) * 8 /* CHAR_BIT */) -#define NILFS_BMAP_NEW_PTR_INIT \ - (1UL << (sizeof(unsigned long) * 8 /* CHAR_BIT */ - 1)) +#define NILFS_BMAP_KEY_BIT BITS_PER_LONG +#define NILFS_BMAP_NEW_PTR_INIT (1UL << (BITS_PER_LONG - 1)) static inline int nilfs_bmap_is_new_ptr(unsigned long ptr) { From d7cee0b342cd90abe5d09976a69d1a22ad0c3441 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Tue, 27 Aug 2024 02:41:11 +0900 Subject: [PATCH 085/103] nilfs2: separate inode type information from i_state field In nilfs_iget_locked() and nilfs_ilookup(), which are used to find or obtain nilfs2 inodes, the nilfs_iget_args structure used to identify inodes has type information divided into multiple booleans, making type determination complicated. Simplify inode type determination by consolidating inode type information into an unsigned integer represented by a comibination of flags and by separating the type identification information for on-memory inodes from the i_state member in the nilfs_inode_info structure. Link: https://lkml.kernel.org/r/20240826174116.5008-4-konishi.ryusuke@gmail.com Signed-off-by: Ryusuke Konishi Cc: Huang Xiaojia Signed-off-by: Andrew Morton --- fs/nilfs2/inode.c | 56 ++++++++++++--------------------------------- fs/nilfs2/nilfs.h | 15 +++++++++--- fs/nilfs2/segment.c | 2 +- fs/nilfs2/super.c | 1 + 4 files changed, 28 insertions(+), 46 deletions(-) diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c index c39bc940e6f2..35f966cb4ece 100644 --- a/fs/nilfs2/inode.c +++ b/fs/nilfs2/inode.c @@ -28,17 +28,13 @@ * @ino: inode number * @cno: checkpoint number * @root: pointer on NILFS root object (mounted checkpoint) - * @for_gc: inode for GC flag - * @for_btnc: inode for B-tree node cache flag - * @for_shadow: inode for shadowed page cache flag + * @type: inode type */ struct nilfs_iget_args { u64 ino; __u64 cno; struct nilfs_root *root; - bool for_gc; - bool for_btnc; - bool for_shadow; + unsigned int type; }; static int nilfs_iget_test(struct inode *inode, void *opaque); @@ -315,8 +311,7 @@ static int nilfs_insert_inode_locked(struct inode *inode, unsigned long ino) { struct nilfs_iget_args args = { - .ino = ino, .root = root, .cno = 0, .for_gc = false, - .for_btnc = false, .for_shadow = false + .ino = ino, .root = root, .cno = 0, .type = NILFS_I_TYPE_NORMAL }; return insert_inode_locked4(inode, ino, nilfs_iget_test, &args); @@ -343,6 +338,7 @@ struct inode *nilfs_new_inode(struct inode *dir, umode_t mode) root = NILFS_I(dir)->i_root; ii = NILFS_I(inode); ii->i_state = BIT(NILFS_I_NEW); + ii->i_type = NILFS_I_TYPE_NORMAL; ii->i_root = root; err = nilfs_ifile_create_inode(root->ifile, &ino, &bh); @@ -546,23 +542,10 @@ static int nilfs_iget_test(struct inode *inode, void *opaque) return 0; ii = NILFS_I(inode); - if (test_bit(NILFS_I_BTNC, &ii->i_state)) { - if (!args->for_btnc) - return 0; - } else if (args->for_btnc) { + if (ii->i_type != args->type) return 0; - } - if (test_bit(NILFS_I_SHADOW, &ii->i_state)) { - if (!args->for_shadow) - return 0; - } else if (args->for_shadow) { - return 0; - } - if (!test_bit(NILFS_I_GCINODE, &ii->i_state)) - return !args->for_gc; - - return args->for_gc && args->cno == ii->i_cno; + return !(args->type & NILFS_I_TYPE_GC) || args->cno == ii->i_cno; } static int nilfs_iget_set(struct inode *inode, void *opaque) @@ -572,15 +555,9 @@ static int nilfs_iget_set(struct inode *inode, void *opaque) inode->i_ino = args->ino; NILFS_I(inode)->i_cno = args->cno; NILFS_I(inode)->i_root = args->root; + NILFS_I(inode)->i_type = args->type; if (args->root && args->ino == NILFS_ROOT_INO) nilfs_get_root(args->root); - - if (args->for_gc) - NILFS_I(inode)->i_state = BIT(NILFS_I_GCINODE); - if (args->for_btnc) - NILFS_I(inode)->i_state |= BIT(NILFS_I_BTNC); - if (args->for_shadow) - NILFS_I(inode)->i_state |= BIT(NILFS_I_SHADOW); return 0; } @@ -588,8 +565,7 @@ struct inode *nilfs_ilookup(struct super_block *sb, struct nilfs_root *root, unsigned long ino) { struct nilfs_iget_args args = { - .ino = ino, .root = root, .cno = 0, .for_gc = false, - .for_btnc = false, .for_shadow = false + .ino = ino, .root = root, .cno = 0, .type = NILFS_I_TYPE_NORMAL }; return ilookup5(sb, ino, nilfs_iget_test, &args); @@ -599,8 +575,7 @@ struct inode *nilfs_iget_locked(struct super_block *sb, struct nilfs_root *root, unsigned long ino) { struct nilfs_iget_args args = { - .ino = ino, .root = root, .cno = 0, .for_gc = false, - .for_btnc = false, .for_shadow = false + .ino = ino, .root = root, .cno = 0, .type = NILFS_I_TYPE_NORMAL }; return iget5_locked(sb, ino, nilfs_iget_test, nilfs_iget_set, &args); @@ -631,8 +606,7 @@ struct inode *nilfs_iget_for_gc(struct super_block *sb, unsigned long ino, __u64 cno) { struct nilfs_iget_args args = { - .ino = ino, .root = NULL, .cno = cno, .for_gc = true, - .for_btnc = false, .for_shadow = false + .ino = ino, .root = NULL, .cno = cno, .type = NILFS_I_TYPE_GC }; struct inode *inode; int err; @@ -677,9 +651,7 @@ int nilfs_attach_btree_node_cache(struct inode *inode) args.ino = inode->i_ino; args.root = ii->i_root; args.cno = ii->i_cno; - args.for_gc = test_bit(NILFS_I_GCINODE, &ii->i_state) != 0; - args.for_btnc = true; - args.for_shadow = test_bit(NILFS_I_SHADOW, &ii->i_state) != 0; + args.type = ii->i_type | NILFS_I_TYPE_BTNC; btnc_inode = iget5_locked(inode->i_sb, inode->i_ino, nilfs_iget_test, nilfs_iget_set, &args); @@ -733,8 +705,8 @@ void nilfs_detach_btree_node_cache(struct inode *inode) struct inode *nilfs_iget_for_shadow(struct inode *inode) { struct nilfs_iget_args args = { - .ino = inode->i_ino, .root = NULL, .cno = 0, .for_gc = false, - .for_btnc = false, .for_shadow = true + .ino = inode->i_ino, .root = NULL, .cno = 0, + .type = NILFS_I_TYPE_SHADOW }; struct inode *s_inode; int err; @@ -900,7 +872,7 @@ static void nilfs_clear_inode(struct inode *inode) if (test_bit(NILFS_I_BMAP, &ii->i_state)) nilfs_bmap_clear(ii->i_bmap); - if (!test_bit(NILFS_I_BTNC, &ii->i_state)) + if (!(ii->i_type & NILFS_I_TYPE_BTNC)) nilfs_detach_btree_node_cache(inode); if (ii->i_root && inode->i_ino == NILFS_ROOT_INO) diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h index 3097490b6621..fb1c4c5bae7c 100644 --- a/fs/nilfs2/nilfs.h +++ b/fs/nilfs2/nilfs.h @@ -22,6 +22,7 @@ /** * struct nilfs_inode_info - nilfs inode data in memory * @i_flags: inode flags + * @i_type: inode type (combination of flags that inidicate usage) * @i_state: dynamic state flags * @i_bmap: pointer on i_bmap_data * @i_bmap_data: raw block mapping @@ -37,6 +38,7 @@ */ struct nilfs_inode_info { __u32 i_flags; + unsigned int i_type; unsigned long i_state; /* Dynamic state flags */ struct nilfs_bmap *i_bmap; struct nilfs_bmap i_bmap_data; @@ -90,9 +92,16 @@ enum { NILFS_I_UPDATED, /* The file has been written back */ NILFS_I_INODE_SYNC, /* dsync is not allowed for inode */ NILFS_I_BMAP, /* has bmap and btnode_cache */ - NILFS_I_GCINODE, /* inode for GC, on memory only */ - NILFS_I_BTNC, /* inode for btree node cache */ - NILFS_I_SHADOW, /* inode for shadowed page cache */ +}; + +/* + * Flags to identify the usage of on-memory inodes (i_type) + */ +enum { + NILFS_I_TYPE_NORMAL = 0, + NILFS_I_TYPE_GC = 0x0001, /* For data caching during GC */ + NILFS_I_TYPE_BTNC = 0x0002, /* For btree node cache */ + NILFS_I_TYPE_SHADOW = 0x0004, /* For shadowed page cache */ }; /* diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c index 2a771e222d86..e4ec36d66607 100644 --- a/fs/nilfs2/segment.c +++ b/fs/nilfs2/segment.c @@ -519,7 +519,7 @@ static void nilfs_segctor_end_finfo(struct nilfs_sc_info *sci, ii = NILFS_I(inode); - if (test_bit(NILFS_I_GCINODE, &ii->i_state)) + if (ii->i_type & NILFS_I_TYPE_GC) cno = ii->i_cno; else if (NILFS_ROOT_METADATA_FILE(inode->i_ino)) cno = 0; diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c index 8eb8dbc9f51c..eca79cca3803 100644 --- a/fs/nilfs2/super.c +++ b/fs/nilfs2/super.c @@ -160,6 +160,7 @@ struct inode *nilfs_alloc_inode(struct super_block *sb) return NULL; ii->i_bh = NULL; ii->i_state = 0; + ii->i_type = 0; ii->i_cno = 0; ii->i_assoc_inode = NULL; ii->i_bmap = &ii->i_bmap_data; From 9860f434056e6358ac63e7068254aeaf339cf71e Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Tue, 27 Aug 2024 02:41:12 +0900 Subject: [PATCH 086/103] nilfs2: eliminate the shared counter and spinlock for i_generation Use get_random_u32() as the source for inode->i_generation for new inodes, and eliminate the original source, the shared counter ns_next_generation along with its exclusive access spinlock ns_next_gen_lock. Link: https://lkml.kernel.org/r/20240826174116.5008-5-konishi.ryusuke@gmail.com Signed-off-by: Ryusuke Konishi Cc: Huang Xiaojia Signed-off-by: Andrew Morton --- fs/nilfs2/inode.c | 6 ++---- fs/nilfs2/the_nilfs.c | 5 ----- fs/nilfs2/the_nilfs.h | 6 ------ 3 files changed, 2 insertions(+), 15 deletions(-) diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c index 35f966cb4ece..3c4a0577bc71 100644 --- a/fs/nilfs2/inode.c +++ b/fs/nilfs2/inode.c @@ -15,6 +15,7 @@ #include #include #include +#include #include "nilfs.h" #include "btnode.h" #include "segment.h" @@ -320,7 +321,6 @@ static int nilfs_insert_inode_locked(struct inode *inode, struct inode *nilfs_new_inode(struct inode *dir, umode_t mode) { struct super_block *sb = dir->i_sb; - struct the_nilfs *nilfs = sb->s_fs_info; struct inode *inode; struct nilfs_inode_info *ii; struct nilfs_root *root; @@ -381,9 +381,7 @@ struct inode *nilfs_new_inode(struct inode *dir, umode_t mode) /* ii->i_dir_acl = 0; */ ii->i_dir_start_lookup = 0; nilfs_set_inode_flags(inode); - spin_lock(&nilfs->ns_next_gen_lock); - inode->i_generation = nilfs->ns_next_generation++; - spin_unlock(&nilfs->ns_next_gen_lock); + inode->i_generation = get_random_u32(); if (nilfs_insert_inode_locked(inode, root, ino) < 0) { err = -EIO; goto failed_after_creation; diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c index e44dde57ab65..ac03fd3c330c 100644 --- a/fs/nilfs2/the_nilfs.c +++ b/fs/nilfs2/the_nilfs.c @@ -12,7 +12,6 @@ #include #include #include -#include #include #include #include "nilfs.h" @@ -69,7 +68,6 @@ struct the_nilfs *alloc_nilfs(struct super_block *sb) INIT_LIST_HEAD(&nilfs->ns_dirty_files); INIT_LIST_HEAD(&nilfs->ns_gc_inodes); spin_lock_init(&nilfs->ns_inode_lock); - spin_lock_init(&nilfs->ns_next_gen_lock); spin_lock_init(&nilfs->ns_last_segment_lock); nilfs->ns_cptree = RB_ROOT; spin_lock_init(&nilfs->ns_cptree_lock); @@ -754,9 +752,6 @@ int init_nilfs(struct the_nilfs *nilfs, struct super_block *sb) nilfs->ns_blocksize_bits = sb->s_blocksize_bits; nilfs->ns_blocksize = blocksize; - get_random_bytes(&nilfs->ns_next_generation, - sizeof(nilfs->ns_next_generation)); - err = nilfs_store_disk_layout(nilfs, sbp); if (err) goto failed_sbh; diff --git a/fs/nilfs2/the_nilfs.h b/fs/nilfs2/the_nilfs.h index 1e829ed7b0ef..4776a70f01ae 100644 --- a/fs/nilfs2/the_nilfs.h +++ b/fs/nilfs2/the_nilfs.h @@ -71,8 +71,6 @@ enum { * @ns_dirty_files: list of dirty files * @ns_inode_lock: lock protecting @ns_dirty_files * @ns_gc_inodes: dummy inodes to keep live blocks - * @ns_next_generation: next generation number for inodes - * @ns_next_gen_lock: lock protecting @ns_next_generation * @ns_mount_opt: mount options * @ns_resuid: uid for reserved blocks * @ns_resgid: gid for reserved blocks @@ -161,10 +159,6 @@ struct the_nilfs { /* GC inode list */ struct list_head ns_gc_inodes; - /* Inode allocator */ - u32 ns_next_generation; - spinlock_t ns_next_gen_lock; - /* Mount options */ unsigned long ns_mount_opt; From 33d23d849610df330ffb02420df705730a79f8e8 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Tue, 27 Aug 2024 02:41:13 +0900 Subject: [PATCH 087/103] nilfs2: do not repair reserved inode bitmap in nilfs_new_inode() After commit 93aef9eda1ce ("nilfs2: fix incorrect inode allocation from reserved inodes") is applied, the inode number returned by nilfs_ifile_create_inode() is guaranteed to always be greater than or equal to NILFS_USER_INO, so if the inode number is a reserved inode number (less than NILFS_USER_INO), the code to repair the bitmap immediately following it is no longer executed. So, delete it. Link: https://lkml.kernel.org/r/20240826174116.5008-6-konishi.ryusuke@gmail.com Signed-off-by: Ryusuke Konishi Cc: Huang Xiaojia Signed-off-by: Andrew Morton --- fs/nilfs2/inode.c | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c index 3c4a0577bc71..3742baec4920 100644 --- a/fs/nilfs2/inode.c +++ b/fs/nilfs2/inode.c @@ -345,19 +345,6 @@ struct inode *nilfs_new_inode(struct inode *dir, umode_t mode) if (unlikely(err)) goto failed_ifile_create_inode; /* reference count of i_bh inherits from nilfs_mdt_read_block() */ - - if (unlikely(ino < NILFS_USER_INO)) { - nilfs_warn(sb, - "inode bitmap is inconsistent for reserved inodes"); - do { - brelse(bh); - err = nilfs_ifile_create_inode(root->ifile, &ino, &bh); - if (unlikely(err)) - goto failed_ifile_create_inode; - } while (ino < NILFS_USER_INO); - - nilfs_info(sb, "repaired inode bitmap for reserved inodes"); - } ii->i_bh = bh; atomic64_inc(&root->inodes_count); From cfdfe9e17c4142cb556f34f25f93f4f85ca494f0 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Tue, 27 Aug 2024 02:41:14 +0900 Subject: [PATCH 088/103] nilfs2: remove sc_timer_task After commit f5d4e04634c9 ("nilfs2: fix use-after-free of timer for log writer thread") is applied, nilfs_construct_timeout(), which is called by a timer and wakes up the log writer thread, is never called after the log writer thread has terminated. As a result, the member variable "sc_timer_task" of the "nilfs_sc_info" structure, which was added when timer_setup() was adopted to retain a reference to the log writer thread's task even after it had terminated, is no longer needed, as it should be; we can simply use "sc_task" instead, which holds a reference to the log writer thread's task for its lifetime. So, eliminate "sc_timer_task" by this means. Link: https://lkml.kernel.org/r/20240826174116.5008-7-konishi.ryusuke@gmail.com Signed-off-by: Ryusuke Konishi Cc: Huang Xiaojia Signed-off-by: Andrew Morton --- fs/nilfs2/segment.c | 3 +-- fs/nilfs2/segment.h | 2 -- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c index e4ec36d66607..ed14e5d4a354 100644 --- a/fs/nilfs2/segment.c +++ b/fs/nilfs2/segment.c @@ -2502,7 +2502,7 @@ static void nilfs_construction_timeout(struct timer_list *t) { struct nilfs_sc_info *sci = from_timer(sci, t, sc_timer); - wake_up_process(sci->sc_timer_task); + wake_up_process(sci->sc_task); } static void @@ -2640,7 +2640,6 @@ static int nilfs_segctor_thread(void *arg) struct the_nilfs *nilfs = sci->sc_super->s_fs_info; int timeout = 0; - sci->sc_timer_task = current; timer_setup(&sci->sc_timer, nilfs_construction_timeout, 0); /* start sync. */ diff --git a/fs/nilfs2/segment.h b/fs/nilfs2/segment.h index 2499721ebcc9..7d1160a266df 100644 --- a/fs/nilfs2/segment.h +++ b/fs/nilfs2/segment.h @@ -115,7 +115,6 @@ struct nilfs_segsum_pointer { * @sc_lseg_stime: Start time of the latest logical segment * @sc_watermark: Watermark for the number of dirty buffers * @sc_timer: Timer for segctord - * @sc_timer_task: Thread woken up by @sc_timer * @sc_task: current thread of segctord */ struct nilfs_sc_info { @@ -172,7 +171,6 @@ struct nilfs_sc_info { unsigned long sc_watermark; struct timer_list sc_timer; - struct task_struct *sc_timer_task; struct task_struct *sc_task; }; From 3f66cc261ccb54a8e4d8d5aa51c389c19453b00c Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Tue, 27 Aug 2024 02:41:15 +0900 Subject: [PATCH 089/103] nilfs2: use kthread_create and kthread_stop for the log writer thread By using kthread_create() and kthread_stop() to start and stop the log writer thread, eliminate custom thread start and stop helpers, as well as the wait queue "sc_wait_task" on the "nilfs_sc_info" struct and NILFS_SEGCTOR_QUIT flag that exist only to implement them. Also, update the kernel doc comments of the changed functions as appropriate. Link: https://lkml.kernel.org/r/20240826174116.5008-8-konishi.ryusuke@gmail.com Signed-off-by: Ryusuke Konishi Cc: Huang Xiaojia Signed-off-by: Andrew Morton --- fs/nilfs2/segment.c | 82 +++++++++++++++++---------------------------- fs/nilfs2/segment.h | 3 -- 2 files changed, 31 insertions(+), 54 deletions(-) diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c index ed14e5d4a354..4ff219f90f47 100644 --- a/fs/nilfs2/segment.c +++ b/fs/nilfs2/segment.c @@ -2628,11 +2628,15 @@ static int nilfs_segctor_flush_mode(struct nilfs_sc_info *sci) } /** - * nilfs_segctor_thread - main loop of the segment constructor thread. + * nilfs_segctor_thread - main loop of the log writer thread * @arg: pointer to a struct nilfs_sc_info. * - * nilfs_segctor_thread() initializes a timer and serves as a daemon - * to execute segment constructions. + * nilfs_segctor_thread() is the main loop function of the log writer kernel + * thread, which determines whether log writing is necessary, and if so, + * performs the log write in the background, or waits if not. It is also + * used to decide the background writeback of the superblock. + * + * Return: Always 0. */ static int nilfs_segctor_thread(void *arg) { @@ -2640,11 +2644,6 @@ static int nilfs_segctor_thread(void *arg) struct the_nilfs *nilfs = sci->sc_super->s_fs_info; int timeout = 0; - timer_setup(&sci->sc_timer, nilfs_construction_timeout, 0); - - /* start sync. */ - sci->sc_task = current; - wake_up(&sci->sc_wait_task); /* for nilfs_segctor_start_thread() */ nilfs_info(sci->sc_super, "segctord starting. Construction interval = %lu seconds, CP frequency < %lu seconds", sci->sc_interval / HZ, sci->sc_mjcp_freq / HZ); @@ -2655,7 +2654,7 @@ static int nilfs_segctor_thread(void *arg) for (;;) { int mode; - if (sci->sc_state & NILFS_SEGCTOR_QUIT) + if (kthread_should_stop()) goto end_thread; if (timeout || sci->sc_seq_request != sci->sc_seq_done) @@ -2709,41 +2708,10 @@ static int nilfs_segctor_thread(void *arg) /* end sync. */ sci->sc_task = NULL; timer_shutdown_sync(&sci->sc_timer); - wake_up(&sci->sc_wait_task); /* for nilfs_segctor_kill_thread() */ spin_unlock(&sci->sc_state_lock); return 0; } -static int nilfs_segctor_start_thread(struct nilfs_sc_info *sci) -{ - struct task_struct *t; - - t = kthread_run(nilfs_segctor_thread, sci, "segctord"); - if (IS_ERR(t)) { - int err = PTR_ERR(t); - - nilfs_err(sci->sc_super, "error %d creating segctord thread", - err); - return err; - } - wait_event(sci->sc_wait_task, sci->sc_task != NULL); - return 0; -} - -static void nilfs_segctor_kill_thread(struct nilfs_sc_info *sci) - __acquires(&sci->sc_state_lock) - __releases(&sci->sc_state_lock) -{ - sci->sc_state |= NILFS_SEGCTOR_QUIT; - - while (sci->sc_task) { - wake_up(&sci->sc_wait_daemon); - spin_unlock(&sci->sc_state_lock); - wait_event(sci->sc_wait_task, sci->sc_task == NULL); - spin_lock(&sci->sc_state_lock); - } -} - /* * Setup & clean-up functions */ @@ -2764,7 +2732,6 @@ static struct nilfs_sc_info *nilfs_segctor_new(struct super_block *sb, init_waitqueue_head(&sci->sc_wait_request); init_waitqueue_head(&sci->sc_wait_daemon); - init_waitqueue_head(&sci->sc_wait_task); spin_lock_init(&sci->sc_state_lock); INIT_LIST_HEAD(&sci->sc_dirty_files); INIT_LIST_HEAD(&sci->sc_segbufs); @@ -2819,8 +2786,12 @@ static void nilfs_segctor_destroy(struct nilfs_sc_info *sci) up_write(&nilfs->ns_segctor_sem); + if (sci->sc_task) { + wake_up(&sci->sc_wait_daemon); + kthread_stop(sci->sc_task); + } + spin_lock(&sci->sc_state_lock); - nilfs_segctor_kill_thread(sci); flag = ((sci->sc_state & NILFS_SEGCTOR_COMMIT) || sci->sc_flush_request || sci->sc_seq_request != sci->sc_seq_done); spin_unlock(&sci->sc_state_lock); @@ -2868,14 +2839,15 @@ static void nilfs_segctor_destroy(struct nilfs_sc_info *sci) * This allocates a log writer object, initializes it, and starts the * log writer. * - * Return Value: On success, 0 is returned. On error, one of the following - * negative error code is returned. - * - * %-ENOMEM - Insufficient memory available. + * Return: 0 on success, or the following negative error code on failure. + * * %-EINTR - Log writer thread creation failed due to interruption. + * * %-ENOMEM - Insufficient memory available. */ int nilfs_attach_log_writer(struct super_block *sb, struct nilfs_root *root) { struct the_nilfs *nilfs = sb->s_fs_info; + struct nilfs_sc_info *sci; + struct task_struct *t; int err; if (nilfs->ns_writer) { @@ -2888,15 +2860,23 @@ int nilfs_attach_log_writer(struct super_block *sb, struct nilfs_root *root) return 0; } - nilfs->ns_writer = nilfs_segctor_new(sb, root); - if (!nilfs->ns_writer) + sci = nilfs_segctor_new(sb, root); + if (unlikely(!sci)) return -ENOMEM; - err = nilfs_segctor_start_thread(nilfs->ns_writer); - if (unlikely(err)) + nilfs->ns_writer = sci; + t = kthread_create(nilfs_segctor_thread, sci, "segctord"); + if (IS_ERR(t)) { + err = PTR_ERR(t); + nilfs_err(sb, "error %d creating segctord thread", err); nilfs_detach_log_writer(sb); + return err; + } + sci->sc_task = t; + timer_setup(&sci->sc_timer, nilfs_construction_timeout, 0); - return err; + wake_up_process(sci->sc_task); + return 0; } /** diff --git a/fs/nilfs2/segment.h b/fs/nilfs2/segment.h index 7d1160a266df..f723f47ddc4e 100644 --- a/fs/nilfs2/segment.h +++ b/fs/nilfs2/segment.h @@ -105,7 +105,6 @@ struct nilfs_segsum_pointer { * @sc_flush_request: inode bitmap of metadata files to be flushed * @sc_wait_request: Client request queue * @sc_wait_daemon: Daemon wait queue - * @sc_wait_task: Start/end wait queue to control segctord task * @sc_seq_request: Request counter * @sc_seq_accepted: Accepted request count * @sc_seq_done: Completion counter @@ -158,7 +157,6 @@ struct nilfs_sc_info { wait_queue_head_t sc_wait_request; wait_queue_head_t sc_wait_daemon; - wait_queue_head_t sc_wait_task; __u32 sc_seq_request; __u32 sc_seq_accepted; @@ -191,7 +189,6 @@ enum { }; /* sc_state */ -#define NILFS_SEGCTOR_QUIT 0x0001 /* segctord is being destroyed */ #define NILFS_SEGCTOR_COMMIT 0x0004 /* committed transaction exists */ /* From 74b0099340e0be96b37f5f8b0b5d02b48bb25a2b Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Tue, 27 Aug 2024 02:41:16 +0900 Subject: [PATCH 090/103] nilfs2: refactor nilfs_segctor_thread() Simplify nilfs_segctor_thread(), the main loop function of the log writer thread, to make the basic structure easier to understand. In particular, the acquisition and release of the sc_state_lock spinlock was scattered throughout the function, so extract the determination of whether log writing is required into a helper function and make the spinlock lock sections clearer. Link: https://lkml.kernel.org/r/20240826174116.5008-9-konishi.ryusuke@gmail.com Signed-off-by: Ryusuke Konishi Cc: Huang Xiaojia Signed-off-by: Andrew Morton --- fs/nilfs2/segment.c | 87 +++++++++++++++++++++------------------------ 1 file changed, 41 insertions(+), 46 deletions(-) diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c index 4ff219f90f47..7c99d71204f1 100644 --- a/fs/nilfs2/segment.c +++ b/fs/nilfs2/segment.c @@ -2627,6 +2627,32 @@ static int nilfs_segctor_flush_mode(struct nilfs_sc_info *sci) return SC_LSEG_SR; } +/** + * nilfs_log_write_required - determine whether log writing is required + * @sci: nilfs_sc_info struct + * @modep: location for storing log writing mode + * + * Return: true if log writing is required, false otherwise. If log writing + * is required, the mode is stored in the location pointed to by @modep. + */ +static bool nilfs_log_write_required(struct nilfs_sc_info *sci, int *modep) +{ + bool timedout, ret = true; + + spin_lock(&sci->sc_state_lock); + timedout = ((sci->sc_state & NILFS_SEGCTOR_COMMIT) && + time_after_eq(jiffies, sci->sc_timer.expires)); + if (timedout || sci->sc_seq_request != sci->sc_seq_done) + *modep = SC_LSEG_SR; + else if (sci->sc_flush_request) + *modep = nilfs_segctor_flush_mode(sci); + else + ret = false; + + spin_unlock(&sci->sc_state_lock); + return ret; +} + /** * nilfs_segctor_thread - main loop of the log writer thread * @arg: pointer to a struct nilfs_sc_info. @@ -2642,70 +2668,39 @@ static int nilfs_segctor_thread(void *arg) { struct nilfs_sc_info *sci = (struct nilfs_sc_info *)arg; struct the_nilfs *nilfs = sci->sc_super->s_fs_info; - int timeout = 0; nilfs_info(sci->sc_super, "segctord starting. Construction interval = %lu seconds, CP frequency < %lu seconds", sci->sc_interval / HZ, sci->sc_mjcp_freq / HZ); set_freezable(); - spin_lock(&sci->sc_state_lock); - loop: - for (;;) { + + while (!kthread_should_stop()) { + DEFINE_WAIT(wait); + bool should_write; int mode; - if (kthread_should_stop()) - goto end_thread; - - if (timeout || sci->sc_seq_request != sci->sc_seq_done) - mode = SC_LSEG_SR; - else if (sci->sc_flush_request) - mode = nilfs_segctor_flush_mode(sci); - else - break; - - spin_unlock(&sci->sc_state_lock); - nilfs_segctor_thread_construct(sci, mode); - spin_lock(&sci->sc_state_lock); - timeout = 0; - } - - - if (freezing(current)) { - spin_unlock(&sci->sc_state_lock); - try_to_freeze(); - spin_lock(&sci->sc_state_lock); - } else { - DEFINE_WAIT(wait); - int should_sleep = 1; + if (freezing(current)) { + try_to_freeze(); + continue; + } prepare_to_wait(&sci->sc_wait_daemon, &wait, TASK_INTERRUPTIBLE); - - if (sci->sc_seq_request != sci->sc_seq_done) - should_sleep = 0; - else if (sci->sc_flush_request) - should_sleep = 0; - else if (sci->sc_state & NILFS_SEGCTOR_COMMIT) - should_sleep = time_before(jiffies, - sci->sc_timer.expires); - - if (should_sleep) { - spin_unlock(&sci->sc_state_lock); + should_write = nilfs_log_write_required(sci, &mode); + if (!should_write) schedule(); - spin_lock(&sci->sc_state_lock); - } finish_wait(&sci->sc_wait_daemon, &wait); - timeout = ((sci->sc_state & NILFS_SEGCTOR_COMMIT) && - time_after_eq(jiffies, sci->sc_timer.expires)); if (nilfs_sb_dirty(nilfs) && nilfs_sb_need_update(nilfs)) set_nilfs_discontinued(nilfs); - } - goto loop; - end_thread: + if (should_write) + nilfs_segctor_thread_construct(sci, mode); + } + /* end sync. */ + spin_lock(&sci->sc_state_lock); sci->sc_task = NULL; timer_shutdown_sync(&sci->sc_timer); spin_unlock(&sci->sc_state_lock); From 7b0a5b666959719043123a8882bae49ec699d948 Mon Sep 17 00:00:00 2001 From: Alok Swaminathan Date: Mon, 26 Aug 2024 11:57:09 -0400 Subject: [PATCH 091/103] lib: glob.c: added null check for character class Add null check for character class. Previously, an inverted character class could result in a nul byte being matched and lead to the function reading past the end of the inputted string. Link: https://lkml.kernel.org/r/20240826155709.12383-1-swaminathanalok@gmail.com Signed-off-by: Alok Swaminathan Signed-off-by: Andrew Morton --- lib/glob.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lib/glob.c b/lib/glob.c index 15b73f490720..aa57900d2062 100644 --- a/lib/glob.c +++ b/lib/glob.c @@ -68,6 +68,8 @@ bool __pure glob_match(char const *pat, char const *str) back_str = --str; /* Allow zero-length match */ break; case '[': { /* Character class */ + if (c == '\0') /* No possible match */ + return false; bool match = false, inverted = (*pat == '!'); char const *class = pat + inverted; unsigned char a = *class++; From 2a1eb111d2859b13e5ff87baa6cfe343317b07bf Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Fri, 30 Aug 2024 11:10:50 +0200 Subject: [PATCH 092/103] squashfs: fix percpu address space issues in decompressor_multi_percpu.c When strict percpu address space checks are enabled, then current direct casts between the percpu address space and the generic address space fail the compilation on x86_64 with: decompressor_multi_percpu.c: In function `squashfs_decompressor_create': decompressor_multi_percpu.c:49:16: error: cast to generic address space pointer from disjoint `__seg_gs' address space pointer decompressor_multi_percpu.c: In function `squashfs_decompressor_destroy': decompressor_multi_percpu.c:64:25: error: cast to `__seg_gs' address space pointer from disjoint generic address space pointer decompressor_multi_percpu.c: In function `squashfs_decompress': decompressor_multi_percpu.c:82:25: error: cast to `__seg_gs' address space pointer from disjoint generic address space pointer Add intermediate casts to unsigned long, as advised in [1] and [2]. Side note: sparse still requires __force when casting from the percpu address space, although the documentation [2] allows casts to unsigned long without __force attribute. Found by GCC's named address space checks. There were no changes in the resulting object file. [1] https://gcc.gnu.org/onlinedocs/gcc/Named-Address-Spaces.html#x86-Named-Address-Spaces [2] https://sparse.docs.kernel.org/en/latest/annotations.html#address-space-name Link: https://lkml.kernel.org/r/20240830091104.13049-1-ubizjak@gmail.com Signed-off-by: Uros Bizjak Cc: Phillip Lougher Signed-off-by: Andrew Morton --- fs/squashfs/decompressor_multi_percpu.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/squashfs/decompressor_multi_percpu.c b/fs/squashfs/decompressor_multi_percpu.c index 8a218e7c2390..e4d7e507b268 100644 --- a/fs/squashfs/decompressor_multi_percpu.c +++ b/fs/squashfs/decompressor_multi_percpu.c @@ -46,7 +46,7 @@ static void *squashfs_decompressor_create(struct squashfs_sb_info *msblk, } kfree(comp_opts); - return (__force void *) percpu; + return (void *)(__force unsigned long) percpu; out: for_each_possible_cpu(cpu) { @@ -61,7 +61,7 @@ out: static void squashfs_decompressor_destroy(struct squashfs_sb_info *msblk) { struct squashfs_stream __percpu *percpu = - (struct squashfs_stream __percpu *) msblk->stream; + (void __percpu *)(unsigned long) msblk->stream; struct squashfs_stream *stream; int cpu; @@ -79,7 +79,7 @@ static int squashfs_decompress(struct squashfs_sb_info *msblk, struct bio *bio, { struct squashfs_stream *stream; struct squashfs_stream __percpu *percpu = - (struct squashfs_stream __percpu *) msblk->stream; + (void __percpu *)(unsigned long) msblk->stream; int res; local_lock(&percpu->lock); From 0aa75a2b3fafccc875d260e190b14faf5a856d45 Mon Sep 17 00:00:00 2001 From: zhangjiao Date: Thu, 29 Aug 2024 12:20:08 +0800 Subject: [PATCH 093/103] tools/mm: rm thp_swap_allocator_test when make clean rm thp_swap_allocator_test when make clean Link: https://lkml.kernel.org/r/20240829042008.6937-1-zhangjiao2@cmss.chinamobile.com Signed-off-by: zhangjiao Signed-off-by: Andrew Morton --- tools/mm/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/mm/Makefile b/tools/mm/Makefile index 15791c1c5b28..f5725b5c23aa 100644 --- a/tools/mm/Makefile +++ b/tools/mm/Makefile @@ -23,7 +23,7 @@ $(LIBS): $(CC) $(CFLAGS) -o $@ $< $(LDFLAGS) clean: - $(RM) page-types slabinfo page_owner_sort + $(RM) page-types slabinfo page_owner_sort thp_swap_allocator_test make -C $(LIB_DIR) clean sbindir ?= /usr/sbin From 546f02823df82cddc411e8db236d296a51308dfa Mon Sep 17 00:00:00 2001 From: Jinjie Ruan Date: Wed, 28 Aug 2024 15:23:40 +0800 Subject: [PATCH 094/103] user_namespace: use kmemdup_array() instead of kmemdup() for multiple allocation Let the kmemdup_array() take care about multiplication and possible overflows. Link: https://lkml.kernel.org/r/20240828072340.1249310-1-ruanjinjie@huawei.com Signed-off-by: Jinjie Ruan Reviewed-by: Kees Cook Cc: Alexey Dobriyan Cc: Christian Brauner Cc: Li zeming Cc: Randy Dunlap Signed-off-by: Andrew Morton --- kernel/user_namespace.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index 0b0b95418b16..aa0b2e47f2f2 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c @@ -853,9 +853,8 @@ static int sort_idmaps(struct uid_gid_map *map) cmp_extents_forward, NULL); /* Only copy the memory from forward we actually need. */ - map->reverse = kmemdup(map->forward, - map->nr_extents * sizeof(struct uid_gid_extent), - GFP_KERNEL); + map->reverse = kmemdup_array(map->forward, map->nr_extents, + sizeof(struct uid_gid_extent), GFP_KERNEL); if (!map->reverse) return -ENOMEM; From 9403001ad65ae4f4c5de368bdda3a0636b51d51a Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Wed, 4 Sep 2024 17:13:07 +0900 Subject: [PATCH 095/103] nilfs2: fix potential null-ptr-deref in nilfs_btree_insert() Patch series "nilfs2: fix potential issues with empty b-tree nodes". This series addresses three potential issues with empty b-tree nodes that can occur with corrupted filesystem images, including one recently discovered by syzbot. This patch (of 3): If a b-tree is broken on the device, and the b-tree height is greater than 2 (the level of the root node is greater than 1) even if the number of child nodes of the b-tree root is 0, a NULL pointer dereference occurs in nilfs_btree_prepare_insert(), which is called from nilfs_btree_insert(). This is because, when the number of child nodes of the b-tree root is 0, nilfs_btree_do_lookup() does not set the block buffer head in any of path[x].bp_bh, leaving it as the initial value of NULL, but if the level of the b-tree root node is greater than 1, nilfs_btree_get_nonroot_node(), which accesses the buffer memory of path[x].bp_bh, is called. Fix this issue by adding a check to nilfs_btree_root_broken(), which performs sanity checks when reading the root node from the device, to detect this inconsistency. Thanks to Lizhi Xu for trying to solve the bug and clarifying the cause early on. Link: https://lkml.kernel.org/r/20240904081401.16682-1-konishi.ryusuke@gmail.com Link: https://lkml.kernel.org/r/20240902084101.138971-1-lizhi.xu@windriver.com Link: https://lkml.kernel.org/r/20240904081401.16682-2-konishi.ryusuke@gmail.com Fixes: 17c76b0104e4 ("nilfs2: B-tree based block mapping") Signed-off-by: Ryusuke Konishi Reported-by: syzbot+9bff4c7b992038a7409f@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=9bff4c7b992038a7409f Cc: Lizhi Xu Signed-off-by: Andrew Morton --- fs/nilfs2/btree.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/nilfs2/btree.c b/fs/nilfs2/btree.c index 862bdf23120e..d390b8ba00d4 100644 --- a/fs/nilfs2/btree.c +++ b/fs/nilfs2/btree.c @@ -381,7 +381,8 @@ static int nilfs_btree_root_broken(const struct nilfs_btree_node *node, if (unlikely(level < NILFS_BTREE_LEVEL_NODE_MIN || level >= NILFS_BTREE_LEVEL_MAX || nchildren < 0 || - nchildren > NILFS_BTREE_ROOT_NCHILDREN_MAX)) { + nchildren > NILFS_BTREE_ROOT_NCHILDREN_MAX || + (nchildren == 0 && level > NILFS_BTREE_LEVEL_NODE_MIN))) { nilfs_crit(inode->i_sb, "bad btree root (ino=%lu): level = %d, flags = 0x%x, nchildren = %d", inode->i_ino, level, flags, nchildren); From 111b812d3662f3a1b831d19208f83aa711583fe6 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Wed, 4 Sep 2024 17:13:08 +0900 Subject: [PATCH 096/103] nilfs2: determine empty node blocks as corrupted Due to the nature of b-trees, nilfs2 itself and admin tools such as mkfs.nilfs2 will never create an intermediate b-tree node block with 0 child nodes, nor will they delete (key, pointer)-entries that would result in such a state. However, it is possible that a b-tree node block is corrupted on the backing device and is read with 0 child nodes. Because operation is not guaranteed if the number of child nodes is 0 for intermediate node blocks other than the root node, modify nilfs_btree_node_broken(), which performs sanity checks when reading a b-tree node block, so that such cases will be judged as metadata corruption. Link: https://lkml.kernel.org/r/20240904081401.16682-3-konishi.ryusuke@gmail.com Fixes: 17c76b0104e4 ("nilfs2: B-tree based block mapping") Signed-off-by: Ryusuke Konishi Cc: Lizhi Xu Signed-off-by: Andrew Morton --- fs/nilfs2/btree.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nilfs2/btree.c b/fs/nilfs2/btree.c index d390b8ba00d4..dedd3c480842 100644 --- a/fs/nilfs2/btree.c +++ b/fs/nilfs2/btree.c @@ -350,7 +350,7 @@ static int nilfs_btree_node_broken(const struct nilfs_btree_node *node, if (unlikely(level < NILFS_BTREE_LEVEL_NODE_MIN || level >= NILFS_BTREE_LEVEL_MAX || (flags & NILFS_BTREE_NODE_ROOT) || - nchildren < 0 || + nchildren <= 0 || nchildren > NILFS_BTREE_NODE_NCHILDREN_MAX(size))) { nilfs_crit(inode->i_sb, "bad btree node (ino=%lu, blocknr=%llu): level = %d, flags = 0x%x, nchildren = %d", From f9c96351aa6718b42a9f42eaf7adce0356bdb5e8 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Wed, 4 Sep 2024 17:13:09 +0900 Subject: [PATCH 097/103] nilfs2: fix potential oob read in nilfs_btree_check_delete() The function nilfs_btree_check_delete(), which checks whether degeneration to direct mapping occurs before deleting a b-tree entry, causes memory access outside the block buffer when retrieving the maximum key if the root node has no entries. This does not usually happen because b-tree mappings with 0 child nodes are never created by mkfs.nilfs2 or nilfs2 itself. However, it can happen if the b-tree root node read from a device is configured that way, so fix this potential issue by adding a check for that case. Link: https://lkml.kernel.org/r/20240904081401.16682-4-konishi.ryusuke@gmail.com Fixes: 17c76b0104e4 ("nilfs2: B-tree based block mapping") Signed-off-by: Ryusuke Konishi Cc: Lizhi Xu Signed-off-by: Andrew Morton --- fs/nilfs2/btree.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/fs/nilfs2/btree.c b/fs/nilfs2/btree.c index dedd3c480842..ef5061bb56da 100644 --- a/fs/nilfs2/btree.c +++ b/fs/nilfs2/btree.c @@ -1659,13 +1659,16 @@ static int nilfs_btree_check_delete(struct nilfs_bmap *btree, __u64 key) int nchildren, ret; root = nilfs_btree_get_root(btree); + nchildren = nilfs_btree_node_get_nchildren(root); + if (unlikely(nchildren == 0)) + return 0; + switch (nilfs_btree_height(btree)) { case 2: bh = NULL; node = root; break; case 3: - nchildren = nilfs_btree_node_get_nchildren(root); if (nchildren > 1) return 0; ptr = nilfs_btree_node_get_ptr(root, nchildren - 1, @@ -1674,12 +1677,12 @@ static int nilfs_btree_check_delete(struct nilfs_bmap *btree, __u64 key) if (ret < 0) return ret; node = (struct nilfs_btree_node *)bh->b_data; + nchildren = nilfs_btree_node_get_nchildren(node); break; default: return 0; } - nchildren = nilfs_btree_node_get_nchildren(node); maxkey = nilfs_btree_node_get_key(node, nchildren - 1); nextmaxkey = (nchildren > 1) ? nilfs_btree_node_get_key(node, nchildren - 2) : 0; From fd127b155523bbfaa91a5872f4d93a80f70b8238 Mon Sep 17 00:00:00 2001 From: Kunwu Chan Date: Wed, 4 Sep 2024 19:16:03 +0900 Subject: [PATCH 098/103] nilfs2: remove duplicate 'unlikely()' usage Nested unlikely() calls, IS_ERR already uses unlikely() internally Link: https://lkml.kernel.org/r/20240904101618.17716-1-konishi.ryusuke@gmail.com Signed-off-by: Kunwu Chan Signed-off-by: Ryusuke Konishi Signed-off-by: Andrew Morton --- fs/nilfs2/page.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nilfs2/page.c b/fs/nilfs2/page.c index 7797903e014e..9c0b7cddeaae 100644 --- a/fs/nilfs2/page.c +++ b/fs/nilfs2/page.c @@ -262,7 +262,7 @@ repeat: NILFS_FOLIO_BUG(folio, "inconsistent dirty state"); dfolio = filemap_grab_folio(dmap, folio->index); - if (unlikely(IS_ERR(dfolio))) { + if (IS_ERR(dfolio)) { /* No empty page is added to the page cache */ folio_unlock(folio); err = PTR_ERR(dfolio); From 73b4fcab4905e17f2a22af278efd75f1cd008e29 Mon Sep 17 00:00:00 2001 From: Joseph Qi Date: Wed, 4 Sep 2024 15:10:04 +0800 Subject: [PATCH 099/103] ocfs2: cleanup return value and mlog in ocfs2_global_read_info() Return 0 instead of sizeof(ocfs2_global_disk_dqinfo) that .quota_read returns in normal case. Also cleanup mlog to make code more readable. Link: https://lkml.kernel.org/r/20240904071004.2067695-2-joseph.qi@linux.alibaba.com Signed-off-by: Joseph Qi Reviewed-by: Heming Zhao Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Cc: Changwei Ge Cc: Gang He Cc: Jun Piao Signed-off-by: Andrew Morton --- fs/ocfs2/quota_global.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c index 0575c2d060eb..2b0daced98eb 100644 --- a/fs/ocfs2/quota_global.c +++ b/fs/ocfs2/quota_global.c @@ -371,12 +371,16 @@ int ocfs2_global_read_info(struct super_block *sb, int type) status = ocfs2_extent_map_get_blocks(oinfo->dqi_gqinode, 0, &oinfo->dqi_giblk, &pcount, NULL); - if (status < 0) + if (status < 0) { + mlog_errno(status); goto out_unlock; + } status = ocfs2_qinfo_lock(oinfo, 0); - if (status < 0) + if (status < 0) { + mlog_errno(status); goto out_unlock; + } status = sb->s_op->quota_read(sb, type, (char *)&dinfo, sizeof(struct ocfs2_global_disk_dqinfo), OCFS2_GLOBAL_INFO_OFF); @@ -404,12 +408,11 @@ int ocfs2_global_read_info(struct super_block *sb, int type) schedule_delayed_work(&oinfo->dqi_sync_work, msecs_to_jiffies(oinfo->dqi_syncms)); -out_err: - return status; + return 0; out_unlock: ocfs2_unlock_global_qf(oinfo, 0); - mlog_errno(status); - goto out_err; +out_err: + return status; } /* Write information to global quota file. Expects exclusive lock on quota From dab2214fec6057d2ba816bba1d30aca73c08ad68 Mon Sep 17 00:00:00 2001 From: WangYuli Date: Fri, 6 Sep 2024 13:40:08 +0800 Subject: [PATCH 100/103] treewide: correct the typo 'retun' There are some spelling mistakes of 'retun' in comments which should be instead of 'return'. Link: https://lkml.kernel.org/r/63D0F870EE8E87A0+20240906054008.390188-1-wangyuli@uniontech.com Signed-off-by: WangYuli Signed-off-by: Andrew Morton --- arch/arm/mach-omap2/omap-mpuss-lowpower.c | 2 +- drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.h | 2 +- drivers/infiniband/core/sa_query.c | 2 +- drivers/input/misc/wistron_btns.c | 2 +- drivers/mtd/nand/raw/nandsim.c | 2 +- drivers/scsi/bfa/bfa_fcs.c | 2 +- drivers/scsi/pmcraid.c | 2 +- 7 files changed, 7 insertions(+), 7 deletions(-) diff --git a/arch/arm/mach-omap2/omap-mpuss-lowpower.c b/arch/arm/mach-omap2/omap-mpuss-lowpower.c index 7ad74db951f6..f18ef45e2fe1 100644 --- a/arch/arm/mach-omap2/omap-mpuss-lowpower.c +++ b/arch/arm/mach-omap2/omap-mpuss-lowpower.c @@ -333,7 +333,7 @@ int omap4_hotplug_cpu(unsigned int cpu, unsigned int power_state) omap_pm_ops.scu_prepare(cpu, power_state); /* - * CPU never retuns back if targeted power state is OFF mode. + * CPU never returns back if targeted power state is OFF mode. * CPU ONLINE follows normal CPU ONLINE ptah via * omap4_secondary_startup(). */ diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.h index b26d5fe40c72..febc3e764a63 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.h +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.h @@ -231,7 +231,7 @@ struct dpu_crtc_state { container_of(x, struct dpu_crtc_state, base) /** - * dpu_crtc_frame_pending - retun the number of pending frames + * dpu_crtc_frame_pending - return the number of pending frames * @crtc: Pointer to drm crtc object */ static inline int dpu_crtc_frame_pending(struct drm_crtc *crtc) diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c index 8175dde60b0a..53571e6b3162 100644 --- a/drivers/infiniband/core/sa_query.c +++ b/drivers/infiniband/core/sa_query.c @@ -1420,7 +1420,7 @@ enum opa_pr_supported { /* * opa_pr_query_possible - Check if current PR query can be an OPA query. * - * Retuns PR_NOT_SUPPORTED if a path record query is not + * Returns PR_NOT_SUPPORTED if a path record query is not * possible, PR_OPA_SUPPORTED if an OPA path record query * is possible and PR_IB_SUPPORTED if an IB path record * query is possible. diff --git a/drivers/input/misc/wistron_btns.c b/drivers/input/misc/wistron_btns.c index 5c4956678cd0..39d6f642cd19 100644 --- a/drivers/input/misc/wistron_btns.c +++ b/drivers/input/misc/wistron_btns.c @@ -1075,7 +1075,7 @@ static void wistron_led_init(struct device *parent) } if (leds_present & FE_MAIL_LED) { - /* bios_get_default_setting(MAIL) always retuns 0, so just turn the led off */ + /* bios_get_default_setting(MAIL) always returns 0, so just turn the led off */ wistron_mail_led.brightness = LED_OFF; if (led_classdev_register(parent, &wistron_mail_led)) leds_present &= ~FE_MAIL_LED; diff --git a/drivers/mtd/nand/raw/nandsim.c b/drivers/mtd/nand/raw/nandsim.c index 179b28459b4b..df48b7d01d16 100644 --- a/drivers/mtd/nand/raw/nandsim.c +++ b/drivers/mtd/nand/raw/nandsim.c @@ -1381,7 +1381,7 @@ static inline union ns_mem *NS_GET_PAGE(struct nandsim *ns) } /* - * Retuns a pointer to the current byte, within the current page. + * Returns a pointer to the current byte, within the current page. */ static inline u_char *NS_PAGE_BYTE_OFF(struct nandsim *ns) { diff --git a/drivers/scsi/bfa/bfa_fcs.c b/drivers/scsi/bfa/bfa_fcs.c index 5023c0ab4277..e52ce9b01f49 100644 --- a/drivers/scsi/bfa/bfa_fcs.c +++ b/drivers/scsi/bfa/bfa_fcs.c @@ -1431,7 +1431,7 @@ bfa_cb_lps_flogo_comp(void *bfad, void *uarg) * param[in] vf_id - VF_ID * * return - * If lookup succeeds, retuns fcs vf object, otherwise returns NULL + * If lookup succeeds, returns fcs vf object, otherwise returns NULL */ bfa_fcs_vf_t * bfa_fcs_vf_lookup(struct bfa_fcs_s *fcs, u16 vf_id) diff --git a/drivers/scsi/pmcraid.c b/drivers/scsi/pmcraid.c index a2a084c8075e..72a4c6e3d0c8 100644 --- a/drivers/scsi/pmcraid.c +++ b/drivers/scsi/pmcraid.c @@ -4009,7 +4009,7 @@ static void pmcraid_tasklet_function(unsigned long instance) * This routine un-registers registered interrupt handler and * also frees irqs/vectors. * - * Retun Value + * Return Value * None */ static From 13309764720624caf2c5afba99b198f3f9fcd9f0 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Sun, 8 Sep 2024 12:26:22 +0300 Subject: [PATCH 101/103] proc: use __auto_type more Switch away from quite chatty declarations using typeof_member(). In theory this is faster to compile too because there is no macro expansion and there is less type checking. Link: https://lkml.kernel.org/r/81bf02fd-8724-4f4d-a2bb-c59620b7d716@p183 Signed-off-by: Alexey Dobriyan Signed-off-by: Andrew Morton --- fs/proc/inode.c | 31 ++++++++----------------------- 1 file changed, 8 insertions(+), 23 deletions(-) diff --git a/fs/proc/inode.c b/fs/proc/inode.c index d19434e2a58e..626ad7bd94f2 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c @@ -303,9 +303,7 @@ static ssize_t proc_reg_read_iter(struct kiocb *iocb, struct iov_iter *iter) static ssize_t pde_read(struct proc_dir_entry *pde, struct file *file, char __user *buf, size_t count, loff_t *ppos) { - typeof_member(struct proc_ops, proc_read) read; - - read = pde->proc_ops->proc_read; + __auto_type read = pde->proc_ops->proc_read; if (read) return read(file, buf, count, ppos); return -EIO; @@ -327,9 +325,7 @@ static ssize_t proc_reg_read(struct file *file, char __user *buf, size_t count, static ssize_t pde_write(struct proc_dir_entry *pde, struct file *file, const char __user *buf, size_t count, loff_t *ppos) { - typeof_member(struct proc_ops, proc_write) write; - - write = pde->proc_ops->proc_write; + __auto_type write = pde->proc_ops->proc_write; if (write) return write(file, buf, count, ppos); return -EIO; @@ -351,9 +347,7 @@ static ssize_t proc_reg_write(struct file *file, const char __user *buf, size_t static __poll_t pde_poll(struct proc_dir_entry *pde, struct file *file, struct poll_table_struct *pts) { - typeof_member(struct proc_ops, proc_poll) poll; - - poll = pde->proc_ops->proc_poll; + __auto_type poll = pde->proc_ops->proc_poll; if (poll) return poll(file, pts); return DEFAULT_POLLMASK; @@ -375,9 +369,7 @@ static __poll_t proc_reg_poll(struct file *file, struct poll_table_struct *pts) static long pde_ioctl(struct proc_dir_entry *pde, struct file *file, unsigned int cmd, unsigned long arg) { - typeof_member(struct proc_ops, proc_ioctl) ioctl; - - ioctl = pde->proc_ops->proc_ioctl; + __auto_type ioctl = pde->proc_ops->proc_ioctl; if (ioctl) return ioctl(file, cmd, arg); return -ENOTTY; @@ -400,9 +392,7 @@ static long proc_reg_unlocked_ioctl(struct file *file, unsigned int cmd, unsigne #ifdef CONFIG_COMPAT static long pde_compat_ioctl(struct proc_dir_entry *pde, struct file *file, unsigned int cmd, unsigned long arg) { - typeof_member(struct proc_ops, proc_compat_ioctl) compat_ioctl; - - compat_ioctl = pde->proc_ops->proc_compat_ioctl; + __auto_type compat_ioctl = pde->proc_ops->proc_compat_ioctl; if (compat_ioctl) return compat_ioctl(file, cmd, arg); return -ENOTTY; @@ -424,9 +414,7 @@ static long proc_reg_compat_ioctl(struct file *file, unsigned int cmd, unsigned static int pde_mmap(struct proc_dir_entry *pde, struct file *file, struct vm_area_struct *vma) { - typeof_member(struct proc_ops, proc_mmap) mmap; - - mmap = pde->proc_ops->proc_mmap; + __auto_type mmap = pde->proc_ops->proc_mmap; if (mmap) return mmap(file, vma); return -EIO; @@ -483,7 +471,6 @@ static int proc_reg_open(struct inode *inode, struct file *file) struct proc_dir_entry *pde = PDE(inode); int rv = 0; typeof_member(struct proc_ops, proc_open) open; - typeof_member(struct proc_ops, proc_release) release; struct pde_opener *pdeo; if (!pde->proc_ops->proc_lseek) @@ -510,7 +497,7 @@ static int proc_reg_open(struct inode *inode, struct file *file) if (!use_pde(pde)) return -ENOENT; - release = pde->proc_ops->proc_release; + __auto_type release = pde->proc_ops->proc_release; if (release) { pdeo = kmem_cache_alloc(pde_opener_cache, GFP_KERNEL); if (!pdeo) { @@ -547,9 +534,7 @@ static int proc_reg_release(struct inode *inode, struct file *file) struct pde_opener *pdeo; if (pde_is_permanent(pde)) { - typeof_member(struct proc_ops, proc_release) release; - - release = pde->proc_ops->proc_release; + __auto_type release = pde->proc_ops->proc_release; if (release) { return release(inode, file); } From e620799c414a035dea1208bcb51c869744931dbb Mon Sep 17 00:00:00 2001 From: I Hsin Cheng Date: Tue, 10 Sep 2024 12:35:31 +0800 Subject: [PATCH 102/103] list: test: fix tests for list_cut_position() Fix test for list_cut_position*() for the missing check of integer "i" after the second loop. The variable should be checked for second time to make sure both lists after the cut operation are formed as expected. Link: https://lkml.kernel.org/r/20240910043531.71343-1-richard120310@gmail.com Signed-off-by: I Hsin Cheng Cc: David Gow Signed-off-by: Andrew Morton --- lib/list-test.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/lib/list-test.c b/lib/list-test.c index 37cbc33e9fdb..8d1d47a9fe9e 100644 --- a/lib/list-test.c +++ b/lib/list-test.c @@ -404,10 +404,13 @@ static void list_test_list_cut_position(struct kunit *test) KUNIT_EXPECT_EQ(test, i, 2); + i = 0; list_for_each(cur, &list1) { KUNIT_EXPECT_PTR_EQ(test, cur, &entries[i]); i++; } + + KUNIT_EXPECT_EQ(test, i, 1); } static void list_test_list_cut_before(struct kunit *test) @@ -432,10 +435,13 @@ static void list_test_list_cut_before(struct kunit *test) KUNIT_EXPECT_EQ(test, i, 1); + i = 0; list_for_each(cur, &list1) { KUNIT_EXPECT_PTR_EQ(test, cur, &entries[i]); i++; } + + KUNIT_EXPECT_EQ(test, i, 2); } static void list_test_list_splice(struct kunit *test) From 5e06e08939df1cafef97a8e04f4b88c2806b538a Mon Sep 17 00:00:00 2001 From: I Hsin Cheng Date: Tue, 10 Sep 2024 12:08:18 +0800 Subject: [PATCH 103/103] list: test: increase coverage of list_test_list_replace*() Increase the test coverage of list_test_list_replace*() by adding the checks to compare the pointer of "a_new.next" and "a_new.prev" to make sure a perfect circular doubly linked list is formed after the replacement. Link: https://lkml.kernel.org/r/20240910040818.65723-1-richard120310@gmail.com Signed-off-by: I Hsin Cheng Cc: David Gow Signed-off-by: Andrew Morton --- lib/list-test.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/lib/list-test.c b/lib/list-test.c index 8d1d47a9fe9e..4f3dc75baec1 100644 --- a/lib/list-test.c +++ b/lib/list-test.c @@ -102,6 +102,8 @@ static void list_test_list_replace(struct kunit *test) /* now: [list] -> a_new -> b */ KUNIT_EXPECT_PTR_EQ(test, list.next, &a_new); KUNIT_EXPECT_PTR_EQ(test, b.prev, &a_new); + KUNIT_EXPECT_PTR_EQ(test, a_new.next, &b); + KUNIT_EXPECT_PTR_EQ(test, a_new.prev, &list); } static void list_test_list_replace_init(struct kunit *test) @@ -118,6 +120,8 @@ static void list_test_list_replace_init(struct kunit *test) /* now: [list] -> a_new -> b */ KUNIT_EXPECT_PTR_EQ(test, list.next, &a_new); KUNIT_EXPECT_PTR_EQ(test, b.prev, &a_new); + KUNIT_EXPECT_PTR_EQ(test, a_new.next, &b); + KUNIT_EXPECT_PTR_EQ(test, a_new.prev, &list); /* check a_old is empty (initialized) */ KUNIT_EXPECT_TRUE(test, list_empty_careful(&a_old));